Repository: timescale/timescaledb-toolkit Branch: main Commit: 97f11d12d5da Files: 195 Total size: 2.3 MB Directory structure: gitextract_o1olp053/ ├── .cargo/ │ └── config ├── .dockerignore ├── .git-blame-ignore-revs ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug-report.md │ │ ├── feature-request.md │ │ ├── feature-stabilization.md │ │ └── proposed-feature.md │ └── workflows/ │ ├── add-to-bugs-board.yml │ ├── ci.yml │ ├── ci_image_build.yml │ ├── clippy_rustfmt.yml │ ├── dependency-updates.yml │ ├── packaging.yml │ ├── release.yml │ └── report_packaging_failures.yml ├── .gitignore ├── Cargo.toml ├── Changelog.md ├── LICENSE ├── NOTICE ├── Readme.md ├── crates/ │ ├── aggregate_builder/ │ │ ├── Cargo.toml │ │ ├── Readme.md │ │ └── src/ │ │ └── lib.rs │ ├── asap/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── fft.rs │ │ └── lib.rs │ ├── count-min-sketch/ │ │ ├── Cargo.toml │ │ ├── src/ │ │ │ └── lib.rs │ │ └── tests/ │ │ └── lib.rs │ ├── counter-agg/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── lib.rs │ │ ├── range.rs │ │ └── tests.rs │ ├── encodings/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── flat_serialize/ │ │ ├── Readme.md │ │ ├── example_generated.rs │ │ ├── flat_serialize/ │ │ │ ├── Cargo.toml │ │ │ └── src/ │ │ │ └── lib.rs │ │ └── flat_serialize_macro/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── lib.rs │ │ └── parser.rs │ ├── hyperloglogplusplus/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── dense.rs │ │ ├── hyperloglog_data.rs │ │ ├── lib.rs │ │ ├── registers.rs │ │ ├── sparse/ │ │ │ └── varint.rs │ │ └── sparse.rs │ ├── scripting-utilities/ │ │ ├── Readme.md │ │ ├── control_file_reader/ │ │ │ ├── Cargo.toml │ │ │ └── src/ │ │ │ └── lib.rs │ │ └── postgres_connection_configuration/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── stats-agg/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── lib.rs │ │ ├── stats1d.rs │ │ ├── stats2d/ │ │ │ └── stats2d_flat_serialize.rs │ │ └── stats2d.rs │ ├── t-digest/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── t-digest-lib/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── time-weighted-average/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── tspoint/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ └── udd-sketch/ │ ├── Cargo.toml │ └── src/ │ └── lib.rs ├── docker/ │ ├── README.md │ └── ci/ │ ├── Dockerfile │ └── setup.sh ├── docs/ │ ├── README.md │ ├── asap.md │ ├── client.md │ ├── counter_agg.md │ ├── examples/ │ │ ├── tdigest.c │ │ └── tdigest.py │ ├── gauge_agg.md │ ├── hyperloglog.md │ ├── lttb.md │ ├── ordered-aggregates.md │ ├── percentile_approximation.md │ ├── release.md │ ├── rolling_average_api_working.md │ ├── state_agg.md │ ├── stats_agg.md │ ├── tdigest.md │ ├── template.md │ ├── test_caggs.md │ ├── test_candlestick_agg.md │ ├── time_weighted_average.md │ ├── timeseries.md │ ├── timeseries_pipeline_elements.md │ ├── two-step_aggregation.md │ └── uddsketch.md ├── extension/ │ ├── .gitignore │ ├── Cargo.toml │ ├── src/ │ │ ├── accessors/ │ │ │ └── tests.rs │ │ ├── accessors.rs │ │ ├── aggregate_builder_tests.rs │ │ ├── aggregate_utils.rs │ │ ├── asap.rs │ │ ├── bin/ │ │ │ └── pgrx_embed.rs │ │ ├── candlestick.rs │ │ ├── counter_agg/ │ │ │ └── accessors.rs │ │ ├── counter_agg.rs │ │ ├── countminsketch.rs │ │ ├── datum_utils.rs │ │ ├── duration.rs │ │ ├── frequency.rs │ │ ├── gauge_agg.rs │ │ ├── heartbeat_agg/ │ │ │ └── accessors.rs │ │ ├── heartbeat_agg.rs │ │ ├── hyperloglog.rs │ │ ├── lib.rs │ │ ├── lttb.rs │ │ ├── nmost/ │ │ │ ├── max_by_float.rs │ │ │ ├── max_by_int.rs │ │ │ ├── max_by_time.rs │ │ │ ├── max_float.rs │ │ │ ├── max_int.rs │ │ │ ├── max_time.rs │ │ │ ├── min_by_float.rs │ │ │ ├── min_by_int.rs │ │ │ ├── min_by_time.rs │ │ │ ├── min_float.rs │ │ │ ├── min_int.rs │ │ │ └── min_time.rs │ │ ├── nmost.rs │ │ ├── palloc.rs │ │ ├── pg_any_element.rs │ │ ├── range.rs │ │ ├── raw.rs │ │ ├── saturation.rs │ │ ├── serialization/ │ │ │ ├── collations.rs │ │ │ ├── functions.rs │ │ │ └── types.rs │ │ ├── serialization.rs │ │ ├── stabilization_info.rs │ │ ├── stabilization_tests.rs │ │ ├── state_aggregate/ │ │ │ ├── accessors.rs │ │ │ └── rollup.rs │ │ ├── state_aggregate.rs │ │ ├── stats_agg.rs │ │ ├── tdigest.rs │ │ ├── time_vector/ │ │ │ ├── iter.rs │ │ │ ├── pipeline/ │ │ │ │ ├── aggregation.rs │ │ │ │ ├── arithmetic.rs │ │ │ │ ├── delta.rs │ │ │ │ ├── expansion.rs │ │ │ │ ├── fill_to.rs │ │ │ │ ├── filter.rs │ │ │ │ ├── lambda/ │ │ │ │ │ ├── executor.rs │ │ │ │ │ ├── lambda_expr.pest │ │ │ │ │ └── parser.rs │ │ │ │ ├── lambda.rs │ │ │ │ ├── map.rs │ │ │ │ └── sort.rs │ │ │ └── pipeline.rs │ │ ├── time_vector.rs │ │ ├── time_weighted_average/ │ │ │ └── accessors.rs │ │ ├── time_weighted_average.rs │ │ ├── type_builder.rs │ │ ├── uddsketch.rs │ │ └── utilities.rs │ └── timescaledb_toolkit.control ├── tests/ │ └── update/ │ ├── candlestick.md │ ├── heartbeat.md │ ├── original_update_tests.md │ ├── state_agg.md │ ├── time-vector.md │ └── time-weighted-average.md └── tools/ ├── build ├── dependencies.sh ├── install-timescaledb ├── post-install/ │ ├── Cargo.toml │ └── src/ │ ├── main.rs │ └── update_script.rs ├── release ├── sql-doctester/ │ ├── Cargo.toml │ ├── Readme.md │ └── src/ │ ├── main.rs │ ├── parser.rs │ ├── runner.rs │ └── startup.sql ├── testbin └── update-tester/ ├── Cargo.toml ├── Readme.md └── src/ ├── installer.rs ├── main.rs ├── parser.rs ├── testrunner/ │ └── stabilization.rs └── testrunner.rs ================================================ FILE CONTENTS ================================================ ================================================ FILE: .cargo/config ================================================ [build] # Postgres symbols won't ve available until runtime rustflags = ["-C", "link-args=-Wl,-undefined,dynamic_lookup"] ================================================ FILE: .dockerignore ================================================ **/*.iml **/*.o **/.DS_Store .editorconfig .idea .vscode .vsls.json .git old-versions target target-analyzer ================================================ FILE: .git-blame-ignore-revs ================================================ # Merge and parent commit for cargo fmt changes b7433344f90b142094e73e84c332385498db9335 8b50127c9e4bad1696a68a800ce1ef019cf6fc3c ================================================ FILE: .github/ISSUE_TEMPLATE/bug-report.md ================================================ --- name: Bug Report about: Something is not working as expected title: '' labels: bug assignees: '' --- **Relevant system information:** - OS: [e.g. Ubuntu 16.04, Windows 10 x64, etc] - PostgreSQL version (output of `SELECT version();`): [e.g. 12.0, 13.2, etc] - TimescaleDB Toolkit version (output of `\dx timescaledb_toolkit` in `psql`): [e.g. 1.0.0] - Installation method: [e.g., "Timescale Cloud", "docker", "source"] **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error **Expected behavior** A clear and concise description of what you _expected_ to happen. **Actual behavior** A clear and concise description of what _actually_ happened. **Screenshots** If applicable, add screenshots to help explain your problem. **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature-request.md ================================================ --- name: Feature Request about: Suggest an idea for this project title: '' labels: feature-request assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you would like to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature-stabilization.md ================================================ --- name: Feature Stabilization about: Checklist of tasks to move a feature out of experimental title: '' labels: '' assignees: '' --- ## [\]() **What evidence do we have the feature is being used** **Why do we feel this feature is ready to be stable** **Is there any known further work needed on this feature after stabilization** **Are there any compatibility concerns that may arise during future work on this feature** ### Feature History - Experimental release version: - Last version modifying on-disk format: - Target stabilization version: ### Stabilization checklist: - [ ] Ensure tests exist for all public API - [ ] Ensure API documentation exists and is accurate - [ ] Remove `toolkit_experimental` tags and update test usages - [ ] Add arrow operators for accessors if applicable - [ ] Ensure arrow operators have test coverage - [ ] If present, ensure `combine` and `rollup` are tested - [ ] Add serialization tests for on disk format - [ ] Add upgrade tests - [ ] Add continuous aggregate test - [ ] Add feature level documentation ================================================ FILE: .github/ISSUE_TEMPLATE/proposed-feature.md ================================================ --- name: Proposed Feature about: Propose a solution to a problem or wishlist item title: '' labels: proposed-feature assignees: '' --- ## What's the functionality you would like to add ## A clear and concise description of what you want to happen. ## How would the function be used ## Give an example of what a workflow using the function would look like ## Why should this feature be added? ## A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] Is your feature request related to a problem? A wishlist item? ### What scale is this useful at? ### Is this useful for large data sets? Small ones? Medium sized? ## Drawbacks ## Are there any issues with this particular solution to the problem? ## Open Questions ## Are any questions we'd need to address before releasing this feature? ## Alternatives ## Are there any alternative to the solutions chosen in the above text? Are there any other issues competing with this one? ================================================ FILE: .github/workflows/add-to-bugs-board.yml ================================================ name: Add bugs to bugs project "on": issues: types: [opened, labeled] issue_comment: types: [created, edited] jobs: add-to-project: name: Add issue to project runs-on: ubuntu-latest steps: - uses: actions/add-to-project@v1.0.2 with: project-url: https://github.com/orgs/timescale/projects/55 github-token: ${{ secrets.ORG_AUTOMATION_TOKEN }} waiting-for-author: name: Waiting for Author runs-on: ubuntu-latest if: github.event_name == 'issues' && github.event.action == 'labeled' && github.event.label.name == 'waiting-for-author' steps: - uses: leonsteinhaeuser/project-beta-automations@v2.2.1 with: gh_token: ${{ secrets.ORG_AUTOMATION_TOKEN }} organization: timescale project_id: 55 resource_node_id: ${{ github.event.issue.node_id }} status_value: 'Waiting for Author' waiting-for-engineering: name: Waiting for Engineering runs-on: ubuntu-latest if: github.event_name == 'issue_comment' && !github.event.issue.pull_request && contains(github.event.issue.labels.*.name, 'waiting-for-author') steps: - name: Check if organization member uses: tspascoal/get-user-teams-membership@v3 id: checkUserMember with: username: ${{ github.actor }} organization: timescale team: 'database-eng' GITHUB_TOKEN: ${{ secrets.ORG_AUTOMATION_TOKEN }} - name: Remove waiting-for-author label if: ${{ steps.checkUserMember.outputs.isTeamMember == 'false' }} uses: andymckay/labeler@3a4296e9dcdf9576b0456050db78cfd34853f260 with: remove-labels: 'waiting-for-author, no-activity' repo-token: ${{ secrets.ORG_AUTOMATION_TOKEN }} - name: Move to waiting for engineering column if: ${{ steps.checkUserMember.outputs.isTeamMember == 'false' }} uses: leonsteinhaeuser/project-beta-automations@v2.2.1 with: gh_token: ${{ secrets.ORG_AUTOMATION_TOKEN }} organization: timescale project_id: 55 resource_node_id: ${{ github.event.issue.node_id }} status_value: 'Waiting for Engineering' ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: pull_request: push: branches: - main - staging - trying schedule: # TimescaleDB integration: 8am UTC, 3am Eastern, midnight Pacific - cron: '0 8 * * 1-4' # Testing on every platform: 6am UTC, 1am Eastern, 10pm Pacific - cron: '0 6 * * 1-4' workflow_dispatch: inputs: container-image: description: 'Container image to pull from DockerHub' required: false tsdb-commit: description: 'TimescaleDB commit to use' default: '' required: false tsdb-repo: description: 'TimescaleDB repo to use' default: 'https://github.com/timescale/timescaledb.git' required: false all-platforms: description: 'Test all platforms' type: boolean default: false jobs: testpostgres: name: Test Postgres runs-on: ${{ contains(matrix.container.image, 'arm64') && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }} container: image: ${{ inputs.container-image || 'timescaledev/toolkit-builder-test' }}:${{ matrix.container.image }} strategy: fail-fast: false max-parallel: 12 matrix: pgversion: [15, 16, 17, 18] container: - os: rockylinux version: "9" image: rockylinux-9-x86_64 schedule: true - os: debian version: "13" image: debian-13-arm64 schedule: true - os: debian version: "13" image: debian-13-amd64 schedule: true - os: debian version: "12" image: debian-12-arm64 schedule: ${{ inputs.all-platforms || ( github.event_name == 'schedule' && github.event.schedule == '0 6 * * 1-4' ) }} - os: debian version: "12" image: debian-12-amd64 schedule: ${{ inputs.all-platforms || ( github.event_name == 'schedule' && github.event.schedule == '0 6 * * 1-4' ) }} - os: debian version: "11" image: debian-11-amd64 schedule: ${{ inputs.all-platforms || ( github.event_name == 'schedule' && github.event.schedule == '0 6 * * 1-4' ) }} - os: ubuntu version: "24.04" image: ubuntu-24.04-amd64 schedule: true - os: ubuntu version: "22.04" image: ubuntu-22.04-amd64 schedule: ${{ inputs.all-platforms || ( github.event_name == 'schedule' && github.event.schedule == '0 6 * * 1-4' ) }} exclude: - container: skip: true - container: schedule: false env: # TODO Why? Cargo default is to pass `-C incremental` to rustc; why don't we want that? # https://doc.rust-lang.org/rustc/codegen-options/index.html#incremental # Well turning it off takes the extension target size down from 3G to 2G... CARGO_INCREMENTAL: 0 # TODO Why? If we're concerned about trouble fetching crates, why not # just fetch them once at the time we select a dependency? # Errors fetching crates are probably rare enough that we don't see the # need to bother, but then why not just let the build fail? CARGO_NET_RETRY: 10 # TODO What reads this? It's not listed on # https://doc.rust-lang.org/cargo/reference/environment-variables.html CI: 1 RUST_BACKTRACE: short steps: - name: Checkout Repository uses: actions/checkout@v5 with: ref: ${{ github.event.pull_request.head.sha }} token: ${{ secrets.GITHUB_TOKEN }} # Github Actions provides a bind mounted working directory for us, where # the above checkout happens, and where caches are read from and restored # to, and it's all owned by 1001. Our container image is `USER root` so # we have no problem writing anywhere, but we run some things as user # 'postgres', which used to be user 1000 but is now 1001. Hoping in the # future to make our container image `USER postgres` and further simplify # this file and the packaging Actions file, but it's non-trivial. - name: chown Repository run: | chown -R postgres . - name: Build and install TimescaleDB if: ${{ (github.event_name == 'schedule' && github.event.schedule == '0 8 * * 1-4') || inputs.tsdb-commit != '' }} run: ./tools/install-timescaledb '${{ matrix.pgversion }}' '${{ inputs.tsdb-repo || 'https://github.com/timescale/timescaledb.git' }}' '${{ inputs.tsdb-commit == '' && 'main' || matrix.tsdb_commit || inputs.tsdb-commit }}' # TODO After the container image contains a primed target dir, is this still worth it? # Only possible advantage is this one is per-pg-version but what's the impact? - name: Cache cargo target dir uses: actions/cache@v4 if: ${{ matrix.container.image == 'debian-11-amd64' }} with: path: target key: ${{ runner.os }}-test-pg${{ matrix.pgversion }}-target-${{ hashFiles('Cargo.lock', '.github/workflows/ci.yml') }} restore-keys: ${{ runner.os }}-test-pg${{ matrix.pgversion }}-target- # Packages not # - name: Run pgrx tests run: | if [ "${{ matrix.container.version }}" = 7 ]; then # needed for pgrx to find clang set +e # will succeed but have non-zero exit code . scl_source enable llvm-toolset-7 set -e fi su postgres -c 'sh tools/build -pg${{ matrix.pgversion }} test-extension 2>&1' - name: Run doc tests # depends on TSDB, which requires PG >=13 if: ${{ matrix.pgversion >= 13 && (matrix.pgversion >= 13 && (matrix.pgversion <= 15 || ((github.event_name == 'schedule' && github.event.schedule == '0 8 * * 1-4') || inputs.tsdb-commit != ''))) }} run: su postgres -c 'sh tools/build -pg${{ matrix.pgversion }} test-doc 2>&1' - name: Run binary update tests (deb) # depends on TSDB, which requires PG >=13 if: ${{ (matrix.container.os == 'debian' || matrix.container.os == 'ubuntu') && (matrix.pgversion >= 13 && (matrix.pgversion <= 16 || ((github.event_name == 'schedule' && github.event.schedule == '0 8 * * 1-4') || inputs.tsdb-commit != ''))) }} run: | su postgres -c 'OS_NAME=${{ matrix.container.os }} OS_VERSION=${{ matrix.container.version }} tools/testbin -version no -bindir / -pgversions ${{ matrix.pgversion }} ci 2>&1' - name: Run binary update tests (EL) if: ${{ (matrix.container.os == 'rockylinux') && matrix.container.version != '9' && (matrix.pgversion >= 13 && (matrix.pgversion <= 16 || ((github.event_name == 'schedule' && github.event.schedule == '0 8 * * 1-4') || inputs.tsdb-commit != ''))) }} run: | su postgres -c 'OS_NAME=${{ matrix.container.os }} OS_VERSION=${{ matrix.container.version }} tools/testbin -version no -bindir / -pgversions ${{ matrix.pgversion }} rpm_ci 2>&1' testcrates: name: Test Crates runs-on: ubuntu-24.04 container: image: ${{ inputs.container-image || 'timescaledev/toolkit-builder' }}:debian-11-amd64 env: CARGO_INCREMENTAL: 0 CARGO_NET_RETRY: 10 CI: 1 RUST_BACKTRACE: short steps: - name: Checkout Repository uses: actions/checkout@v5 with: ref: ${{ github.event.pull_request.head.sha }} - name: chown Repository run: chown -R postgres . - name: Cache cargo target dir uses: actions/cache@v4 with: path: target key: ${{ runner.os }}-test-crates-target-${{ hashFiles('Cargo.lock', '.github/workflows/ci.yml') }} restore-keys: ${{ runner.os }}-test-crates-target- - name: Run Crates Tests run: su postgres -c 'sh tools/build test-crates 2>&1' ================================================ FILE: .github/workflows/ci_image_build.yml ================================================ name: Build CI Image on: pull_request: paths: - 'docker/ci/**' - '.github/workflows/ci_image_build.yml' - 'tools/dependencies.sh' workflow_dispatch: inputs: tag-base: description: 'Push image to DockerHub with this base tag (remove "-test" enable)' required: false # Repeating the default here for ease of editing in the github actions form. Keep in sync with below. default: timescaledev/toolkit-builder-test toolkit-commit: description: 'Toolkit commit (branch, tag, etc.) to build image from' required: false default: main builder-commit: description: 'Commit (branch, tag, etc.) on release-build-scripts repository to use' required: false jobs: build: env: GITHUB_TOKEN: ${{ secrets.ORG_AUTOMATION_TOKEN}} runs-on: ubuntu-24.04 steps: - name: Run release-build-scripts job # Repeating the default here for 'pull_request'. Keep in sync with above. run: | echo "toolkit-commit: ${{ inputs.toolkit-commit || github.event.pull_request.head.sha }}" echo "builder: ${{ inputs.builder-commit || 'main' }}" echo "tag-base: ${{ inputs.tag-base || 'timescaledev/toolkit-builder-test' }}" gh workflow run toolkit-image.yml \ -R timescale/release-build-scripts \ -r ${{ inputs.builder-commit || 'main' }} \ -f tag-base=${{ inputs.tag-base || 'timescaledev/toolkit-builder-test' }} \ -f toolkit-commit=${{ inputs.toolkit-commit || github.event.pull_request.head.sha }} ================================================ FILE: .github/workflows/clippy_rustfmt.yml ================================================ name: Clippy and rustfmt on: pull_request: push: branches: - main - staging - trying workflow_dispatch: inputs: container-image: description: 'Container image to pull from DockerHub' required: false jobs: clippy: name: Clippy/rustfmt Test runs-on: ubuntu-24.04 container: # Duplicated from ci.yml image: ${{ inputs.container-image || 'timescaledev/toolkit-builder-test:debian-11-amd64' }} env: # TODO: See TODOs on duplicate block in ci.yml CARGO_INCREMENTAL: 0 CARGO_NET_RETRY: 10 CI: 1 RUST_BACKTRACE: short steps: - name: Checkout Repository uses: actions/checkout@v5 with: ref: ${{ github.event.pull_request.head.sha }} - name: chown Repository run: chown -R postgres . - name: Cache cargo target dir uses: actions/cache@v4 with: path: target key: ${{ runner.os }}-clippy-target-${{ hashFiles('Cargo.lock', '.github/workflows/clippy_rustfmt.yml') }} restore-keys: ${{ runner.os }}-clippy-target- - name: Run Clippy # Github captures stdout and stderr separately and then intermingles them # in the wrong order. We don't actually care to distinguish, so redirect # stderr to stdout so we get the proper order. run: su postgres -c 'sh tools/build clippy 2>&1' - name: Verify formatting run: su postgres -c 'cargo fmt --check 2>&1' ================================================ FILE: .github/workflows/dependency-updates.yml ================================================ name: Dependency Updates on: schedule: # Run on the 1st of every month at 9:00 AM UTC - cron: '0 9 1 * *' workflow_dispatch: env: CARGO_TERM_COLOR: always jobs: update-dependencies: runs-on: ubuntu-latest steps: - uses: actions/checkout@v5 with: token: ${{ secrets.GITHUB_TOKEN }} - uses: dtolnay/rust-toolchain@stable with: toolchain: stable - name: Cache cargo registry and index uses: actions/cache@v4 with: path: | ~/.cargo/registry/index/ ~/.cargo/registry/cache/ ~/.cargo/git/db/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} restore-keys: | ${{ runner.os }}-cargo- - name: Update dependencies run: | # Update dependencies and capture the output cargo update --verbose > update_output.txt 2>&1 || true # Check if Cargo.lock was modified if git diff --quiet Cargo.lock; then echo "NO_UPDATES=true" >> $GITHUB_ENV echo "No dependency updates available" else echo "NO_UPDATES=false" >> $GITHUB_ENV echo "Dependencies updated, changes detected in Cargo.lock" fi - name: Run cargo check if: env.NO_UPDATES == 'false' run: cargo check --all-targets --verbose - name: Run tests if: env.NO_UPDATES == 'false' run: cargo test --verbose - name: Generate update summary if: env.NO_UPDATES == 'false' run: | echo "## 📦 Monthly Dependency Updates" > pr_body.md echo "" >> pr_body.md echo "This PR contains automated dependency updates for $(date +'%B %Y')." >> pr_body.md echo "" >> pr_body.md echo "### Changes:" >> pr_body.md echo "\`\`\`" >> pr_body.md cat update_output.txt >> pr_body.md echo "\`\`\`" >> pr_body.md echo "" >> pr_body.md echo "### Verification:" >> pr_body.md echo "- ✅ \`cargo check\` passed" >> pr_body.md echo "- ✅ \`cargo test\` passed" >> pr_body.md echo "" >> pr_body.md echo "---" >> pr_body.md echo "*This PR was automatically created by the monthly dependency update workflow.*" >> pr_body.md - name: Create Pull Request if: env.NO_UPDATES == 'false' uses: peter-evans/create-pull-request@v6 with: token: ${{ secrets.GITHUB_TOKEN }} commit-message: "chore: update dependencies for $(date +'%B %Y')" title: "chore: Monthly dependency updates - $(date +'%B %Y')" body-path: pr_body.md branch: dependency-updates/$(date +'%Y-%m') delete-branch: true labels: | dependencies automated assignees: ${{ github.repository_owner }} draft: false - name: Summary run: | if [ "$NO_UPDATES" = "true" ]; then echo "✅ No dependency updates needed - all dependencies are up to date!" else echo "✅ Dependency update PR created successfully!" fi ================================================ FILE: .github/workflows/packaging.yml ================================================ # Trigger package workflows on release tagging name: Build packages on: push: tags: - "[0-9]+.[0-9]+.[0-9]+" workflow_dispatch: jobs: package: env: GITHUB_TOKEN: ${{ secrets.ORG_AUTOMATION_TOKEN }} runs-on: ubuntu-24.04 steps: - name: Set env run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV - name: Debian and Ubuntu packages if: always() run: | gh workflow run toolkit-apt.yml -R timescale/release-build-scripts -r main -f version=${{ env.RELEASE_VERSION }} -f upload-artifacts=true - name: RPM packages if: always() run: | gh workflow run toolkit-rpm.yml -R timescale/release-build-scripts -r main -f version=${{ env.RELEASE_VERSION }} -f upload-artifacts=true ================================================ FILE: .github/workflows/release.yml ================================================ name: Release on: workflow_dispatch: inputs: version: description: 'New version number for release' required: true commit: description: 'Commit id to branch from (default is HEAD of main)' type: string required: false default: main # TODO Make this harder to screw up by making a checkbox. dry-run: description: '-n for dry-run, -push to really release' type: string required: false default: -n jobs: release: name: Release runs-on: ubuntu-24.04 container: image: timescaledev/toolkit-builder-test:debian-11-amd64 steps: - name: Checkout Repository uses: actions/checkout@v5 with: ref: ${{ inputs.commit }} - name: chown Repository run: chown -R postgres . - name: Install dependencies not yet in image run: su postgres -c 'tools/release setup' 2>&1 - name: Run tools/release env: GITHUB_TOKEN: ${{ secrets.ORG_AUTOMATION_TOKEN }} ACTIONS_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: su postgres -c 'tools/release ${{ inputs.dry-run }} -version ${{ inputs.version }} ${{ inputs.commit }}' 2>&1 ================================================ FILE: .github/workflows/report_packaging_failures.yml ================================================ name: Report Build Package Failures on: workflow_run: workflows: [Build packages, Build CI Image, CI] types: [completed] jobs: on-failure: runs-on: ubuntu-24.04 if: ${{ github.event.workflow_run.conclusion != 'success' && github.event.workflow_run.event != 'pull_request' }} steps: - name: slack-send uses: slackapi/slack-github-action@v1.19.0 with: payload: | { "blocks": [ { "type": "section", "text": { "type": "mrkdwn", "text": "Workflow run <${{ github.event.workflow_run.html_url }}|${{ github.event.workflow.name}}#${{ github.event.workflow_run.run_number }}>" } }, { "type": "section", "fields": [ { "type": "mrkdwn", "text": "*Status*\n`${{ github.event.workflow_run.conclusion }}`" }, { "type": "mrkdwn", "text": "*Triggered By*\n<${{ github.event.sender.html_url }}|${{ github.event.sender.login }}>" } ] } ] } env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_NOTIFY_WEBHOOK_URL }} SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK ================================================ FILE: .gitignore ================================================ .DS_Store /.idea /.vscode /.vsls.json /old-versions /target *.iml /target-analyzer /.editorconfig ================================================ FILE: Cargo.toml ================================================ [workspace] resolver = "2" members = [ "crates/t-digest-lib", "extension", "tools/post-install", "tools/sql-doctester", "tools/update-tester", ] [profile.release] lto = "fat" debug = true codegen-units = 1 ================================================ FILE: Changelog.md ================================================ # Toolkit Changelog ## Process for updating this changelog This changelog should be updated as part of a PR if the work is worth noting (most of them should be). If unsure, always add an entry here for any PR targeted for the next release. It's easier to remove than add an entry at final review time for the next release. ## Next Release (Date TBD) #### New experimental features #### Bug fixes #### Other notable changes #### Shout-outs **Full Changelog**: [TODO] ## [1.21.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.21.0) (2025-04-17) #### New experimental features #### Bug fixes #### Other notable changes - [#847](https://github.com/timescale/timescaledb-toolkit/pull/847): Added `total` accessor for tdigest and uddsketch - [#853](https://github.com/timescale/timescaledb-toolkit/pull/853): Performance improvements for `UDDSketch` #### Shout-outs **Full Changelog**: [TODO] ## [1.19.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.19.0) (2024-11-14) #### New experimental features #### Bug fixes #### Other notable changes #### Shout-outs **Full Changelog**: [TODO] ## [1.18.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.18.0) (2023-11-28) #### New experimental features - [#776](https://github.com/timescale/timescaledb-toolkit/pull/776): PostgreSQL 16 support **Full Changelog**: [TODO] ## [1.17.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.17.0) (2023-07-06) #### New experimental features #### Bug fixes - [#761](https://github.com/timescale/timescaledb-toolkit/pull/761): Make sure nmost combine uses correct memctx #### Other notable changes #### Shout-outs **Full Changelog**: [TODO] ## [1.16.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.16.0) (2023-04-05) #### Bug fixes - [#733](https://github.com/timescale/timescaledb-toolkit/pull/733): Fix a bug when rolling up overlapping heartbeat_aggs - [#740](https://github.com/timescale/timescaledb-toolkit/pull/740): When interpolating an 'locf' time weighted average, extend last point to interpolation boundary - [#742](https://github.com/timescale/timescaledb-toolkit/pull/742): Ignore incoming NULL values in hyperloglog rollup #### Stabilized features - [#741](https://github.com/timescale/timescaledb-toolkit/pull/741): Stabilize `approx_count_distinct` - [#748](https://github.com/timescale/timescaledb-toolkit/pull/748): Stabilize `approx_percentile_array` - [#745](https://github.com/timescale/timescaledb-toolkit/pull/745): Stabilize date utility functions - [#751](https://github.com/timescale/timescaledb-toolkit/pull/751): Stabilize `min_n`/`max_n`/`min_n_by`/`max_n_by` - [#752](https://github.com/timescale/timescaledb-toolkit/pull/752): Stabilize `mcv_agg`, this was previously our `topn_agg` #### Other notable changes - [#743](https://github.com/timescale/timescaledb-toolkit/pull/743): Remove support for direct upgrades from toolkit versions more than 1 year old. Toolkit versions 1.4.x and 1.5.x will have to upgrade to an intermediate version before upgrading to 1.16.0. - [#744](https://github.com/timescale/timescaledb-toolkit/pull/744): Fix nightly CI failures from building TimescaleDB on Enterprise Linux - [#749](https://github.com/timescale/timescaledb-toolkit/pull/749): Added num_live_ranges, num_gaps, and trim_to accessors for heartbeat aggregates **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.15.0...1.16.0 ## [1.15.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.15.0) (2023-03-08) #### New experimental features #### Bug fixes - [#715](https://github.com/timescale/timescaledb-toolkit/pull/715): Fix out-of-bounds indexing error in `state_agg` rollup #### Stabilized features - [#722](https://github.com/timescale/timescaledb-toolkit/pull/722): Stabilize heartbeat aggregate. - [#724](https://github.com/timescale/timescaledb-toolkit/pull/724): Stabilize integral and interpolated_integral for time-weighted-average. - [#723](https://github.com/timescale/timescaledb-toolkit/pull/723): Stabilized `state_agg` #### Other notable changes - [#716](https://github.com/timescale/timescaledb-toolkit/issues/716): Add arrow operator support for counter aggregate and time-weighted aggregate interpolated accessors. - [#716](https://github.com/timescale/timescaledb-toolkit/issues/716): Remove experimental versions of interpolated accessors for counter aggregate and time-weighted aggregates. The stable versions introduced in 1.14.0 should be used instead. - [#723](https://github.com/timescale/timescaledb-toolkit/pull/723): Added `state_at` function for `state_agg` - [#709](https://github.com/timescale/timescaledb-toolkit/pull/709): Updated pgx version to 0.7.1 **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.14.0...1.15.0 ## [1.14.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.14.0) (2023-02-09) #### New experimental features #### Bug fixes - [#660](https://github.com/timescale/timescaledb-toolkit/issues/660): Heartbeat aggregate rollup should interpolate aggregates - [#679](https://github.com/timescale/timescaledb-toolkit/issues/679): Heartbeat agg rollup producing invalid aggregates. #### Stabilized features - [#701](https://github.com/timescale/timescaledb-toolkit/pull/701): Stabilize candlestick. - [#650](https://github.com/timescale/timescaledb-toolkit/pull/650): Stabilize interpolated_delta & interpolated_rate for counter aggregate, and interpolated_average for time-weighted aggregate. #### Other notable changes - [#685](https://github.com/timescale/timescaledb-toolkit/issues/685): rollup for freq_agg and topn_agg - [#692](https://github.com/timescale/timescaledb-toolkit/pull/692): Support specifying a range to `duration_in` to specify a time range to get states in for state aggregates - [#692](https://github.com/timescale/timescaledb-toolkit/pull/692): Removed `next` parameter from interpolated state aggregate functions - [#692](https://github.com/timescale/timescaledb-toolkit/pull/692): Renamed `state_agg` to `compact_state_agg` and `timeline_agg` to `state_agg` - [#699](https://github.com/timescale/timescaledb-toolkit/pull/699): `interpolated_duration_in`/`duration_in`/`interpolated_state_periods`/`state_periods` have the first two arguments swapped: now the aggregate is first and the state is second - [#699](https://github.com/timescale/timescaledb-toolkit/pull/699): `into_values`/`into_int_values` now returns a table with intervals instead of microseconds **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.13.1...1.14.0 ## [1.13.1](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.13.1) (2023-01-03) **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.13.0...1.13.1 - [#664](https://github.com/timescale/timescaledb-toolkit/pull/664) Support PostgreSQL 15. ## [1.13.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.13.0) (2022-12-13) #### New experimental features - [#615](https://github.com/timescale/timescaledb-toolkit/pull/615): Heartbeat aggregate Users can use the new `heartbeat_agg(timestamp, start_time, agg_interval, heartbeat_interval)` to track the liveness of a system in the range (`start_time`, `start_time` + `agg_interval`). Each timestamp seen in that range is assumed to indicate system liveness for the following `heartbeat_interval`. Once constructed, users can query heartbeat aggregates for `uptime` and `downtime`, as well as query for `live_ranges` or `dead_ranges`. Users can also check for `live_at(timestamp)`. Heartbeat aggregates can also interpolated to better see behavior around the boundaries of the individual aggregates. - [#620](https://github.com/timescale/timescaledb-toolkit/pull/620): Expose TDigest type This is a prototype for building `TDigest` objects client-side, for `INSERT` into tables. This is a lightly tested prototype; try it out at your own risk! [Examples](docs/examples/) - [#635](https://github.com/timescale/timescaledb-toolkit/pull/635): AsOf joins for timevectors This allows users to join two timevectors with the following semantics `timevectorA -> asof(timevectorB)`. This will return records with the LOCF value from timevectorA at the timestamps from timevectorB. Specifically the returned records contain, for each value in timevectorB, {the LOCF value from timevectorA, the value from timevectorB, the timestamp from timevectorB}. - [#609](https://github.com/timescale/timescaledb-toolkit/pull/609): New `approx_percentile_array()` function Users can use the new `toolkit_experimental.approx_percentile_array(percentiles)` to generate an array of percentile results instead of having to call and rebuild the aggregate multiple times. - [#636](https://github.com/timescale/timescaledb-toolkit/pull/636): New `timeline_agg` aggregate, which is similar to `state_agg` but tracks the entire state timeline instead of just the duration in each state. - [#640](https://github.com/timescale/timescaledb-toolkit/pull/640): Support `rollup` for `state_agg` and `timeline_agg`. - [#640](https://github.com/timescale/timescaledb-toolkit/pull/640): Support integer states for `state_agg` and `timeline_agg`. - [#638](https://github.com/timescale/timescaledb-toolkit/pull/638): Introducing Time Vector Templates. Users can use the new experimental function `toolkit_experimental.to_text(timevector(time, value),format_string)` to render a formatted text representation of their time vector series. These changes also include `toolkit_experimental.to_plotly(timevector(time, value))`, which will render your time vector series in a format suitable for use with plotly. #### Bug fixes - [#644](https://github.com/timescale/timescaledb-toolkit/pull/644): Fix bug in Candlestick aggregate and reenable partial aggregation. #### Other notable changes - [#646](https://github.com/timescale/timescaledb-toolkit/pull/646): Added experimental support for PostgreSQL 15. - [#621](https://github.com/timescale/timescaledb-toolkit/pull/621): Rocky Linux 9 support **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.12.1...1.13.0 ## [1.12.1](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.12.1) (2022-11-17) #### Bug fixes - [#624](https://github.com/timescale/timescaledb-toolkit/pull/624): Remove partial aggregation for Candlestick aggregates. We've determined that the cause for the bad results lives somewhere in the functions that are used to support partial aggregation. We can at least prevent folks from running the candlestick aggregates in parallel mode and hitting this bug by dropping support for partial aggregation until we've resolved the issue. ## [1.12.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.12.0) (2022-11-08) #### New experimental features - [#596](https://github.com/timescale/timescaledb-toolkit/pull/596): Introduce Candlestick Aggregate. Users can use either the `toolkit_experimental.candlestick_agg(timestamp, price, volume)` aggregate or the `toolkit_experimental.candlestick(timestamp, open, high, low, close, volume)` function, depending on whether they are starting from tick data or already aggregated data. Both the aggregate form and the function form of `Candlestick` support the following (experimental) accessors (in addition to being re-aggregated via `rollup`): `open`, `high`, `low`, `close`, `open_time`, `high_time`, `low_time`, `close_time`, `volume`, `vwap` (Volume Weighted Average Price) *NOTE*: This functionality improves upon and replaces the need for `toolkit_experimental.ohlc` which will be removed in the next release. - [#590](https://github.com/timescale/timescaledb-toolkit/pull/590): New `min_n`/`max_n` functions and related `min_n_by`/`max_n_by`. The former is used to get the top N values from a column while the later will also track some additional data, such as another column or even the entire row. These should give the same results as a `SELECT ... ORDER BY ... LIMIT n`, except they can be composed and combined like other toolkit aggregates. #### Bug fixes - [#568](https://github.com/timescale/timescaledb-toolkit/pull/568): Allow `approx_count` accessor function to take NULL inputs. - [#574](https://github.com/timescale/timescaledb-toolkit/pull/574): Add default unit to interpolated_integral. #### Other notable changes - RPM packages for CentOS 7 have returned. - New Homebrew formula available for macOS installation: `brew install timescale/tap/timescaledb-toolkit`. - [#547](https://github.com/timescale/timescaledb-toolkit/pull/547): Update pgx to 0.5.0. This is necessary for adding Postgres 15 support coming soon. - [#571](https://github.com/timescale/timescaledb-toolkit/pull/571): Update CI docker image for pgx 0.5.0. - [#599](https://github.com/timescale/timescaledb-toolkit/pull/599): Reduce floating point error when using `stats_agg` in moving aggregate mode. - [#589](https://github.com/timescale/timescaledb-toolkit/pull/589): Update pgx to 0.5.4. - [#594](https://github.com/timescale/timescaledb-toolkit/pull/594): Verify that pgx doesn't generate CREATE OR REPLACE FUNCTION. - [#592](https://github.com/timescale/timescaledb-toolkit/pull/592): Add build script option to install in release mode. #### Shout-outs - @zyro for reporting null handling issue on `count_min_sketch`. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.11.0...1.12.0 ## [1.11.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.11.0) (2022-09-21) #### New experimental features - arm64/aarch64 DEB packages are now available for Ubuntu 20.04 (focal) & 22.04 (jammy), and Debian 10 (buster) & 11 (bulleye). - [#526](https://github.com/timescale/timescaledb-toolkit/pull/526): Add `integral` and `interpolated_integral` functions for the time_weight aggregate. Makes `trapezoidal` an alias for `linear` in `time_weight` as it might be a more familiar numeric integral method for some. - [#517](https://github.com/timescale/timescaledb-toolkit/pull/517): Add a gap preserving `lttb` named `gp_lttb` to handle downsampling of data with large gaps. - [#513](https://github.com/timescale/timescaledb-toolkit/pull/513): Add `first_val`, `last_val`, `first_time` and `last_time` to `time_weight` and `counter_agg` to access the first and the last data points within the aggregate data structures. - [#527](https://github.com/timescale/timescaledb-toolkit/pull/527): Rename `{open, high, low, close}_at` to `{open, high, low, close}_time` to be consistent with newly added `first_time` and `last_time` accessor functions. #### Stabilized features - [#498](https://github.com/timescale/timescaledb-toolkit/pull/498): Stabilize `asap_smooth` aggregate. #### Bug fixes - [#509](https://github.com/timescale/timescaledb-toolkit/pull/509), [#531](https://github.com/timescale/timescaledb-toolkit/pull/531): Fix bugs in`hyperloglog`. Error rates are now significantly more consistent when the number of buckets are close to the actual cardinality. - [#514](https://github.com/timescale/timescaledb-toolkit/pull/514): Fix a bug in `toolkit_experimental.interpolated_delta`. - [#503](https://github.com/timescale/timescaledb-toolkit/pull/503): Fix bitwise logic in timevector combine. - [#507](https://github.com/timescale/timescaledb-toolkit/pull/507): Fix a typo in `approx_count_distinct`. #### Other notable changes - DEB packages for Ubuntu 18.04 (Bionic) on amd64 are now available. - [#536](https://github.com/timescale/timescaledb-toolkit/pull/536): Document equirement to use same compiler for cargo-pgx and Toolkit. - [#535](https://github.com/timescale/timescaledb-toolkit/pull/535): Make tests pass in Canadian locales. - [#537](https://github.com/timescale/timescaledb-toolkit/pull/537): Enforce `cargo fmt` in CI. - [#524](https://github.com/timescale/timescaledb-toolkit/pull/524): Updating Toolkit To Start Using Cargo Fmt. - [#522](https://github.com/timescale/timescaledb-toolkit/pull/522): Move update-tester tests to markdown files. #### Shout-outs - @BenSandeen for fixing typos and errors in the hyperloglog++ implementation. - @jaskij for reporting security advisories and suggestion on documenting support for PG 14. - @jeremyhaberman for fixing a typo in `APPROX_COUNT_DISTINCT_DEFAULT_SIZE`. - @jledentu for reporting an error on `interpolated_delta`. - @stevedrip for a very detailed bug report on hyperloglog++ and suggestions for fixing it. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.10.1...1.11.0 ## [1.10.1](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.10.1) (2022-08-18) #### New experimental features - [#490](https://github.com/timescale/timescaledb-toolkit/pull/490): Month normalization function `month_normalize` and the helper function `days_in_month`, useful for normalizing data to a fixed month length for more meaningful month-to-month comparison. - [#496](https://github.com/timescale/timescaledb-toolkit/pull/496): `OHLC` aggregate, and the associated `rollup` and accessor functions `open`, `high`, `low`, `close`, `{open, high, low, close}_at` mainly for trading data. #### Stabilized features - [#495](https://github.com/timescale/timescaledb-toolkit/pull/495): `LTTB` downsampling function. - [#491](https://github.com/timescale/timescaledb-toolkit/pull/491), [#488](https://github.com/timescale/timescaledb-toolkit/pull/488): The arrow operators (->) of the accessor functions for `stats_agg`, `percentile_agg`, `counter_agg`, `gauge_agg` and `hyperloglog`. As an example, `average` accessor can now be used with `stats_agg` like this, ```SQL select location, stats_agg(temperature) -> average() AS avg_temperature from conditions group by location ``` #### Bug fixes - [#465](https://github.com/timescale/timescaledb-toolkit/pull/465): Off by one error in state_agg interpolate. #### Other notable changes - Fix an issue where the 1.9.0 release unintentionally identified the toolkit extension version as 1.10.0-dev in the postgresql control file. - [#467](https://github.com/timescale/timescaledb-toolkit/pull/467): Document supported platforms in Readme. - [#463](https://github.com/timescale/timescaledb-toolkit/pull/463): Use pg14 as an example for instructions in instead of pg13. Add reference to deb and rpm packages. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.8.0...1.10.1 ## [1.9.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.9.0) (2022-08-16) **An incorrect version (1.10.0-dev) was used which can cause upgrade failures. Not made GA.** ## [1.8.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.8.0) (2022-07-05) #### New experimental features - [#454](https://github.com/timescale/timescaledb-toolkit/pull/454): Saturating Math for i32/integers: - `saturating_add` - `saturating_add_pos` - `saturating_sub` - `saturating_sub_pos` - `saturating_mul` - [#456](https://github.com/timescale/timescaledb-toolkit/pull/456): Adding interpolating accessors: - `interpolated_duration_in` to `state_agg`, - `interpolated_average` to `time_weight`, `interpolated_delta` - `interpolated_rate` to `counter_agg` and `gauge_agg`. - [#388](https://github.com/timescale/timescaledb-toolkit/pull/388): Create Count-Min Sketch crate. - [#459](https://github.com/timescale/timescaledb-toolkit/pull/459): Add a convenient `approx_count_distinct` function which internally uses hyperloglog with a default bucket size of 2^15. - [#458](https://github.com/timescale/timescaledb-toolkit/pull/458): Add `count_min_sketch` aggregate and `approx_count` accessor. - [#434](https://github.com/timescale/timescaledb-toolkit/pull/434): Initial changes to support aarch64-unknown-linux-gnu. #### Bug fixes - [#429](https://github.com/timescale/timescaledb-toolkit/pull/429): Support explicit NULL values in timevectors. - [#441](https://github.com/timescale/timescaledb-toolkit/pull/441): Relax tolerance in UDDSketch merge assertions. - [#444](https://github.com/timescale/timescaledb-toolkit/pull/444): Fix default collation deserialization. #### Other notable changes - [#451](https://github.com/timescale/timescaledb-toolkit/pull/451): Improve error message for HyperLogLog. - [#417](https://github.com/timescale/timescaledb-toolkit/pull/417): Include both pgx 0.2.x and pgx 0.4.x in CI image. - [#416](https://github.com/timescale/timescaledb-toolkit/pull/416): Prepare for the 1.8.0 cycle. - [#418](https://github.com/timescale/timescaledb-toolkit/pull/418): Made update-tester require two versions of cargo-pgx. - [#421](https://github.com/timescale/timescaledb-toolkit/pull/421): Don't install pgx as root or under "/". - [#427](https://github.com/timescale/timescaledb-toolkit/pull/427): Fix failing update-tester in CI. - [#428](https://github.com/timescale/timescaledb-toolkit/pull/428): Update github cache keys. - [#430](https://github.com/timescale/timescaledb-toolkit/pull/430): Lock pgx versions all the way. - [#408](https://github.com/timescale/timescaledb-toolkit/pull/408): Upgrade to pgx 0.4.5. - [#436](https://github.com/timescale/timescaledb-toolkit/pull/436): Change which cargo-pgx subcommand is added to PATH in CI image. - [#432](https://github.com/timescale/timescaledb-toolkit/pull/432): Remove PATH hack in tools/build script. - [#437](https://github.com/timescale/timescaledb-toolkit/pull/437): GitHub Actions improvements. - [#448](https://github.com/timescale/timescaledb-toolkit/pull/448): Run clippy GitHub Actions job without qualification. - [#446](https://github.com/timescale/timescaledb-toolkit/pull/446): Update README.md. - [#414](https://github.com/timescale/timescaledb-toolkit/pull/414): Specify Ubuntu 20.04 instead of 'latest' in github configuration. #### Shout-outs - @tyhoff for reporting UDDSketch assertion error [#396](https://github.com/timescale/timescaledb-toolkit/issues/396). - @hardikm10 for reporting hyperloglog deserialization issue [#443](https://github.com/timescale/timescaledb-toolkit/issues/443). **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.7.0...1.8.0 ## [1.7.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.7.0) (2022-05-10) #### New experimental features - [#389](https://github.com/timescale/timescaledb-toolkit/pull/389): Create typed specialization for `freq_agg` and `topn_agg`. #### Bug fixes - [#378](https://github.com/timescale/timescaledb-toolkit/pull/378): Return INTERVAL from `duration_in(TEXT, StateAgg)` instead of `i64`. - [#379](https://github.com/timescale/timescaledb-toolkit/pull/379): Handle NULL output from our aggregates: `asap`, `counter_agg`, `freq_agg`, `gauge_agg`, `hyperloglog`, `lttb`, `stats_agg`, `tdigest`, `uddsketch`. #### Other notable changes - [#367](https://github.com/timescale/timescaledb-toolkit/pull/367): Switch stabilization tests to new info, meaning that there's one central location for stabilization info. - [#372](https://github.com/timescale/timescaledb-toolkit/pull/372): Improve tools/build flexibility for local builds. - [#394](https://github.com/timescale/timescaledb-toolkit/pull/394): Copy almost all the counter_agg functions for gauge_agg. - [#395](https://github.com/timescale/timescaledb-toolkit/pull/395): Remove GUC as they are no longer needed. - [#399](https://github.com/timescale/timescaledb-toolkit/pull/399): Allow manual packaging. - [#405](https://github.com/timescale/timescaledb-toolkit/pull/405): Update CI to rust 1.60. - [#407](https://github.com/timescale/timescaledb-toolkit/pull/407): Update postgres versions in ci Dockerfile. - [#409](https://github.com/timescale/timescaledb-toolkit/pull/409): Make dependencies version explicit in our CI image. - [#404](https://github.com/timescale/timescaledb-toolkit/pull/404): Refactor TimeVector to greatly simplify structure. - [#412](https://github.com/timescale/timescaledb-toolkit/pull/412): Allow building CI image in Actions. - [#411](https://github.com/timescale/timescaledb-toolkit/pull/411), [#413](https://github.com/timescale/timescaledb-toolkit/pull/413): Create reportpackagingfailures.yml for reporting packaging failures not from CI builds. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.6.0...1.7.0 ## [1.6.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.6.0) (2022-03-29) #### New experimental features - [#330](https://github.com/timescale/timescaledb-toolkit/pull/330): Add serialization for FrequencyTransState. - [#368](https://github.com/timescale/timescaledb-toolkit/pull/368): Add `into_values` function for `state_agg`. - [#370](https://github.com/timescale/timescaledb-toolkit/pull/370): Add a `topn (topn_agg)` variant of `freq_agg`, which is more convenient to use. - [#375](https://github.com/timescale/timescaledb-toolkit/pull/375): Add `gauge_agg` and associated accessor functions `delta`, `idelta_left`, `idelta_right`, and the `rollup` function. #### Other notable changes - [#332](https://github.com/timescale/timescaledb-toolkit/pull/332): Speed up builds by fixing github action cache and cargo build cache. - [#377](https://github.com/timescale/timescaledb-toolkit/pull/377): Stop auto building _nightly_ image. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.5.2...1.6.0 ## [1.5.2](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.5.2) (2022-03-07) **HIGH PRIORITY SECURITY UPDATE**. #### Bug fixes - There's a vulnerability in Toolkit 1.5 and earlier due to the fact that it creates a PLPGSQL function using CREATE OR REPLACE and without properly locking down the search path. This means that a user could pre-create the trigger function to run arbitrary code. To fix this we remove the trigger entirely; it no longer pulls its weight. This fix locks down our update scripts to only use CREATE OR REPLACE when actually necessary; while we don't yet have an exploit for the other functions, it would be unsurprising if one exists. - [#351](https://github.com/timescale/timescaledb-toolkit/pull/351): Make serialize functions strict to handle NULL values in partitioned aggregates. #### Shout-outs - @svenklemm for reporting the vulnerability. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.5.0...1.5.2 ## [1.5.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.5.0) (2022-01-31) **The first version that unifies the community build with Timescale Cloud build.** #### New experimental features - `freq_agg` for estimating the most common elements in a column. - `state_agg` for measuring the total time spent in different states. #### Other notable changes - Enforce clippy linting. - Update rust to 1.57. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.4.0...1.5.0 ## [1.4.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.4.0), [1.4.0-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.4.0-cloud) (2021-11-17) #### Stabilized features - Postgres 14 support. #### Other notable changes - Upgrade pgx to 0.2. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.3.1...1.4.0-cloud ## [1.3.1](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.3.1), [1.3.1-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.3.1-cloud) (2021-10-27) #### Stabilized features - Postgres 14 support. #### Other notable changes - Upgrade pgx to 0.2. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.3.0...1.3.1-cloud ## [1.3.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.3.0), [1.3.0-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.3.0-cloud) (2021-10-18) #### New experimental features - `timevector` function pipelines: a compact and more readable way to perform a sequence of analytic operations such as the following one, ``` timevector(ts, val) -> sort() -> delta() -> abs() -> sum() ``` - `->` accessor for Toolkit types enables syntax like `stats_agg(data) -> average()`. - `to_epoch()` wrapper for `extract ('EPOCH' FROM timestamp)` that makes it work more like an inverse of `to_timestamp(DOUBLE PRECISION)`. #### Stabilized features - `counter_agg` helper functions for Prometheus-style resetting monotonic counters. - `hyperloglog` efficient approximate COUNT DISTINCT. - `stats_agg` two-step aggregate for common statistics. #### Other notable changes - This release changes the textual I/O format for Toolkit types. We are uncertain if we will need to do so again in the future. Due to this we currently only support dump/restore within a single version of the extension. #### Shout-outs - @jonatas for the contribution [#237](https://github.com/timescale/timescaledb-toolkit/pull/237). - @burmecia for the contribution [#251](https://github.com/timescale/timescaledb-toolkit/pull/251). **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.2.0...1.3.0-cloud ## [1.2.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.2.0), [1.2.0-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.2.0-cloud) (2021-09-14) #### New experimental features - Refinements to `hyperloglog` including a function to report relative error and fixing the functionality of `rollup`. - Introduction of a `topn` approximation API. Presently this will only work for integer data, but expect to see further refinements that greatly expand this behavior. - New `map_series` and `map_data` pipeline elements for the time series API that allow uses to provide custom transforms of their time series data. Additionally introduced a `|>>` pipeline operator for an even more streamlined interface into the new mapping functionality. #### Bug fixes - Make a pass through all toolkit functions to correctly label behavior as immutable and parallel safe. This should improve the optimizations Postgres can apply to toolkit plans, particularly when run in a Timescale multinode cluster. - Improve handling of internal data structures to reduce extraneous copies of data. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.1.0...1.2.0-cloud ## [1.1.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.1.0), [1.1.0-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.1.0-cloud) (2021-08-04) #### New experimental features - `hyperloglog` has been updated to use Hyperloglog++ under the hood. This does not change the user-facing API but should improve the accuracy of hyperloglog() estimates. This is the last major change expected for hyperloglog() and is now a candidate for stabilization pending user feedback. - We've started experimenting with the pipeline API. While it's still very much a work in progress, it's at a point where the high-level concepts should be understandable. For example, a pipeline that outputs the daily change of a set of data, interpolating away any gaps in daily data, could look like ``` SELECT timeseries(time, val) |> sort() |> resample_to_rate('trailing_average', '24 hours', true) |> fill_holes('interpolate') |> delta() FROM ... ``` It's still early days for this API and it is not yet polished, but we would love feedback about its direction. #### Bug fixes - Fix a small memory leak in aggregation functions. This could have leaked ≈8 bytes per aggregate call. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/1.0.0...1.1.0-cloud ## [1.0.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.0.0), [1.0.0-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/1.0.0-cloud) (2021-07-12) **This release renames the extension to `TimescaleDB Toolkit` from Timescale Analytics and starts stabilizing functionality.** #### New experimental features - `stats_agg()` eases the analysis of more sophisticated bucketed statistics, such as rolling averages. (Docs are forthcoming, until then fell free to peruse the design discussion doc. - `timeseries` which will serve as a building block for many pipelines, and unifies the output of lttb and ASAP. #### Stabilized features - Percentile-approximation algorithms including `percentile_agg()`, `uddsketch()` and `tdigest()` along with their associated functions. These are especially useful for computing percentiles in continuous aggregates. - [Time-weighted average](https://github.com/timescale/timescaledb-toolkit/blob/main/docs/time_weighted_average.md) along with its associated functions. This eases taking the average over an irregularly spaced dataset that only includes changepoints. #### Other notable changes - The on-disk layout `uddsketch` has be reworked to store buckets compressed. This can result in an orders-of-magnitude reduction in it's storage requirements. - The textual format `uddsketch` has been reworked to be more readable. - Functions that take in a `uddsketch` or `tdigest` have been reworked to be 0-copy when applicable, improving the performance of such functions by 10-100x. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/0.3.0...1.0.0-cloud ## [0.3.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/0.3.0), [0.3.0-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/0.3.0-cloud) (2021-06-17) #### Other notable changes - Internal improvements. - Largely prep work for the upcoming 1.0 release. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/0.2.0...0.3.0-cloud ## [0.2.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/0.2.0) (2021-04-08), [0.2.0-cloud](https://github.com/timescale/timescaledb-toolkit/releases/tag/0.2.0-cloud) (2021-04-29) #### New experimental features - ASAP Smoothing (`asap_smooth`) – A graph smoothing algorithm that highlights changes. - Counter Aggregates (`counter_agg`) – Tools to ease working with reset-able counters. - Largest Triangle Three Buckets (`lttb`) – A downsampling algorithm that tries to preserve visual similarity. - Time Bucket Range – A version of `time_bucket()` that outputs the [start, end) times of the bucket. - Update `UddSketch` with an aggregate that merges multiple `UddSketchs` and various internal improvements. **Full Changelog**: https://github.com/timescale/timescaledb-toolkit/compare/0.1.0...0.2.0-cloud ## [0.1.0](https://github.com/timescale/timescaledb-toolkit/releases/tag/0.1.0) (2021-03-03) #### New experimental features - `hyperloglog` – An approximate COUNT DISTINCT based on hashing that provides reasonable accuracy in constant space. - `tdigest` – A quantile estimate sketch optimized to provide more accurate estimates near the tails (i.e. 0.001 or 0.995) than conventional approaches. - `uddsketch` – A quantile estimate sketch which provides a guaranteed maximum relative error. - Time-weighted average (`time_weight`) – A time-weighted averaging function to determine the value of things proportionate to the time they are set. #### Stabilized features - None. All features are experimental. ================================================ FILE: LICENSE ================================================ Unless otherwise Source code in this repository, and any binaries built from this source code, in whole or in part, are licensed under the Timescale License (the "License"). You may not use these files except in compliance with the License. You may obtain a copy of the License at https://github.com/timescale/timescaledb/blob/master/tsl/LICENSE-TIMESCALE ================================================ FILE: NOTICE ================================================ TimescaleDB-Toolkit (TM) Copyright (c) 2021-2024 Timescale, Inc. All Rights Reserved. Unless otherwise stated, source code in this repository, and any binaries built from this source code, in whole or in part, are licensed under the Timescale License (the "License"). You may not use these files except in compliance with the License. You may obtain a copy of the License at https://github.com/timescale/timescaledb/blob/master/tsl/LICENSE-TIMESCALE ================================================ FILE: Readme.md ================================================ [![CI](https://github.com/timescale/timescaledb-toolkit/actions/workflows/ci.yml/badge.svg?branch=main)](https://github.com/timescale/timescaledb-toolkit/actions/workflows/ci.yml) # TimescaleDB Toolkit This repository is the home of the TimescaleDB Toolkit team. Our mission is to ease all things analytics when using TimescaleDB, with a particular focus on developer ergonomics and performance. Our issue tracker contains more on [the features we're planning to work on](https://github.com/timescale/timescaledb-toolkit/labels/proposed-feature) and [the problems we're trying to solve](https://github.com/timescale/timescaledb-toolkit/labels/feature-request). Documentation for this version of the TimescaleDB Toolkit extension can be found in this repository at [`docs`](https://github.com/timescale/timescaledb-toolkit/tree/main/docs). The release history can be found on this repo's [GitHub releases](https://github.com/timescale/timescaledb-toolkit/releases). ## 🖥 Try It Out The extension comes pre-installed on all [Tiger Cloud](https://www.tigerdata.com/cloud) instances and also on our full-featured [`timescale/timescaledb-ha` docker image](https://hub.docker.com/r/timescale/timescaledb-ha). If DEB and RPM packages are a better fit for your situation, refer to the [Install Toolkit on self-hosted TimescaleDB](https://docs.timescale.com/self-hosted/latest/tooling/install-toolkit/#install-toolkit-on-self-hosted-timescaledb) how-to guide for further instructions on installing the extension via your package manager. All versions of the extension contain experimental features in the `toolkit_experimental` schema. See [our docs section on experimental features](/docs/README.md#tag-notes) for more details. ## 💿 Installing From Source ### Supported platforms The engineering team regularly tests the extension on the following platforms: - x86_64-unknown-linux-gnu (Ubuntu Linux 24.04) (tested prior to every merge) - aarch64-unknown-linux-gnu (Ubuntu Linux 24.04) (tested at release time) - x86_64-apple-darwin (MacOS 12) (tested frequently on eng workstation) - aarch64-apple-darwin (MacOS 12) (tested frequently on eng workstation) As for other platforms: patches welcome! ### 🔧 Tools Setup Building the extension requires valid [rust](https://www.rust-lang.org/) (we build and test on 1.65), [rustfmt](https://github.com/rust-lang/rustfmt), and clang installs, along with the postgres headers for whichever version of postgres you are running, and pgrx. We recommend installing rust using the [official instructions](https://www.rust-lang.org/tools/install): ```bash curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh ``` and build tools, the postgres headers, in the preferred manner for your system. You may also need to install OpenSSl. For Ubuntu you can follow the [postgres install instructions](https://www.postgresql.org/download/linux/ubuntu/) then run ```bash sudo apt-get install make gcc pkg-config clang postgresql-server-dev-14 libssl-dev ``` Next you need [cargo-pgrx](https://github.com/tcdi/pgrx), which can be installed with ```bash cargo install --version '=0.16.1' --force cargo-pgrx ``` You must reinstall cargo-pgrx whenever you update your Rust compiler, since cargo-pgrx needs to be built with the same compiler as Toolkit. Finally, setup the pgrx development environment with ```bash cargo pgrx init --pg14 pg_config ``` Installing from source is also available on macOS and requires the same set of prerequisites and set up commands listed above. ### 💾 Building and Installing the extension Download or clone this repository, and switch to the `extension` subdirectory, e.g. ```bash git clone https://github.com/timescale/timescaledb-toolkit && \ cd timescaledb-toolkit/extension ``` Then run ``` cargo pgrx install --release && \ cargo run --manifest-path ../tools/post-install/Cargo.toml -- pg_config ``` To initialize the extension after installation, enter the following into `psql`: ``` CREATE EXTENSION timescaledb_toolkit; ``` ## ✏️ Get Involved We appreciate your help in shaping the project's direction! Have a look at the [list of features we're thinking of working on](https://github.com/timescale/timescaledb-toolkit/labels/proposed-feature) and feel free to comment on the features or expand the list. ### 🔨 Testing See above for prerequisites and installation instructions. You can run tests against a postgres version `pg15`, `pg16`, `pg17`, or `pg18` using ``` cargo pgrx test ${postgres_version} ``` ## Learn about Tiger Data Tiger Data is the fastest PostgreSQL for transactional, analytical, and agentic workloads. To learn more about the company and its products, visit [tigerdata.com](https://www.tigerdata.com). ================================================ FILE: crates/aggregate_builder/Cargo.toml ================================================ [package] name = "aggregate_builder" version = "0.1.0" edition = "2018" [lib] proc-macro = true [dependencies] syn = {version="1.0", features=["extra-traits", "visit", "visit-mut", "full"]} quote = "1.0" proc-macro2 = "1.0" [features] print-generated = [] ================================================ FILE: crates/aggregate_builder/Readme.md ================================================ # Aggregate Builder # Library for building Postgres [aggregate functions](https://www.postgresql.org/docs/current/xaggr.html) that imitates [`CREATE AGGREGATE`](https://www.postgresql.org/docs/current/sql-createaggregate.html). ## Syntax ## Current syntax looks something like like ```rust #[aggregate] impl aggregate_name { type State = InternalTransitionType; fn transition( state: Option, #[sql_type("sql_type")] argument: RustType, // can have an arbitrary number of args ) -> Option { // transition function function body goes here } fn finally(state: Option<&mut State>) -> Option { // final function function body goes here } // the remaining items are optional // parallel-safety marker if desirable const PARALLEL_SAFE: bool = true; fn serialize(state: &State) -> bytea { // serialize function body goes here } fn deserialize(bytes: bytea) -> State { // deserialize function body goes here } fn combine(state1: Option<&State>, state2: Option<&State>) -> Option { // combine function body goes here } } ``` All items except for `type State`, `fn transition()`, and `fn finally()` are optional. The SQL for the aggregate and its functions will be created automatically, and any necessary memory context switching is handled automatically for most cases¹. ¹It will switch to the aggregate memory context before calling the transition function body and the combine function body. Looking through `array_agg()`'s code this seems to be the correct places to do so. Note that if you want to allocate in the aggregate memory context in the final function other work may be needed. ## Example ## Below is a complete example of an `anything()` aggregate that returns one of the aggregated values. ```rust #[aggregate] impl anything { type State = String; fn transition( state: Option, #[sql_type("text")] value: String, ) -> Option { state.or(Some(value)) } fn finally(state: Option<&State>) -> Option { state.as_deref().cloned() } } ``` ## Expansion ## Ignoring some supplementary type checking we add to improve error messages, the macro expands aggregate definitions to rust code something like the following (explanations as comments in-line) ```rust // we nest things within a module to mimic the namespacing of an `impl` block pub mod aggregate_name { // glob import to further act like an `impl` use super::*; pub type State = String; // PARALLEL_SAFE constant in case someone wants to use it // unlikely to be actually used in practice #[allow(dead_code)] pub const PARALLEL_SAFE: bool = true; #[pgrx::pg_extern(immutable, parallel_safe)] pub fn aggregate_name_transition_fn_outer( __inner: pgrx::Internal, value: RustType, __fcinfo: pg_sys::FunctionCallInfo, ) -> Option { use crate::palloc::{Inner, InternalAsValue, ToInternal}; unsafe { // Translate from the SQL type to the rust one // we actually store an `Option` rather than a `State`. let mut __inner: Option>> = __inner.to_inner(); // We steal the state out from under the pointer leaving `None` in // its place. This means that if the inner transition function // panics the inner transition function will free `State` while the // teardown hook in the aggregate memory context will only free inner let inner: Option = match &mut __inner { None => None, Some(inner) => Option::take(&mut **inner), }; let state: Option = inner; // Switch to the aggregate memory context. This ensures that the // transition state lives for as long as the aggregate, and that if // we allocate from Postgres within the inner transition function // those too will stay around. crate::aggregate_utils::in_aggregate_context(__fcinfo, || { // call the inner transition function let result = transition(state, value); // return the state to postgres, if we have a pointer just store // in that, if not allocate one only if needed. let state: Option = result; __inner = match (__inner, state) { (None, None) => None, (None, state @ Some(..)) => Some(state.into()), (Some(mut inner), state) => { *inner = state; Some(inner) } }; __inner.internal() }) } } pub fn transition(state: Option, value: String) -> Option { // elided } #[pgrx::pg_extern(immutable, parallel_safe)] pub fn aggregate_name_finally_fn_outer( __internal: pgrx::Internal, __fcinfo: pg_sys::FunctionCallInfo, ) -> Option { use crate::palloc::InternalAsValue; unsafe { // Convert to the rust transition type, see the comment in the // transition function for why we store an `Option` let mut input: Option>> = __internal.to_inner(); let input: Option<&mut State> = input.as_deref_mut() .map(|i| i.as_mut()) .flatten(); // We pass in an `Option<&mut State>`; `Option<>` because the // transition state might not have been initialized yet; // `&mut State` since while the final function has unique access to // the transition function it must leave it a valid state when it's // finished let state: Option<&mut State> = input; finally(state) } } pub fn finally(state: Option<&mut State>) -> Option { // elided } #[pgrx::pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] pub fn aggregate_name_serialize_fn_outer(__internal: pgrx::Internal) -> bytea { use crate::palloc::{Inner, InternalAsValue}; // Convert to the rust transition type, see the comment in the // transition function for why we store an `Option` let input: Option>> = unsafe { __internal.to_inner() }; let mut input: Inner> = input.unwrap(); // We pass by-reference for the same reason as the final function. // Note that _technically_ you should not mutate in the serialize, // function though there are cases you can get away with it when using // an `internal` transition type. let input: &mut State = input.as_mut().unwrap(); let state: &State = input; serialize(state) } pub fn serialize(state: &State) -> bytea { // elided } #[pgrx::pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] pub fn aggregate_name_deserialize_fn_outer( bytes: crate::raw::bytea, _internal: Internal, ) -> Option { use crate::palloc::ToInternal; let result = deserialize(bytes); let state: State = result; // Convert to the rust transition type, see the comment in the // transition function for why we store an `Option`. // We deliberately don't switch to the aggregate transition context // because the postgres aggregates do not do so. let state: Inner> = Some(state).into(); unsafe { Some(state).internal() } } pub fn deserialize(bytes: crate::raw::bytea) -> State { // elided } #[pgrx::pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn aggregate_name_combine_fn_outer( a: Internal, b: Internal, __fcinfo: pg_sys::FunctionCallInfo, ) -> Option { use crate::palloc::{Inner, InternalAsValue, ToInternal}; unsafe { // Switch to the aggregate memory context. This ensures that the // transition state lives for as long as the aggregate, and that if // we allocate from Postgres within the inner transition function // those too will stay around. crate::aggregate_utils::in_aggregate_context(__fcinfo, || { let result = combine(a.to_inner().as_deref(), b.to_inner().as_deref()); let state: Option = result; let state = match state { None => None, state @ Some(..) => { let state: Inner> = state.into(); Some(state) } }; state.internal() }) } } pub fn combine(a: Option<&State>, b: Option<&State>) -> Option { // elided } // SQL generated for the aggregate pgrx::extension_sql!("\n\ CREATE AGGREGATE toolkit_experimental.aggregate_name (value RustType) (\n\ stype = internal,\n\ sfunc = toolkit_experimental.aggregate_name_transition_fn_outer,\n\ finalfunc = toolkit_experimental.aggregate_name_finally_fn_outer,\n\ parallel = safe,\n serialfunc = toolkit_experimental.aggregate_name_serialize_fn_outer,\n\ deserialfunc = toolkit_experimental.aggregate_name_deserialize_fn_outer,\n\ combinefunc = toolkit_experimental.aggregate_name_combine_fn_outer\n\ );\n", name = "aggregate_name_extension_sql", requires = [ aggregate_name_transition_fn_outer, aggregate_name_finally_fn_outer, aggregate_name_serialize_fn_outer, aggregate_name_deserialize_fn_outer, aggregate_name_combine_fn_outer, ], ); } ``` ================================================ FILE: crates/aggregate_builder/src/lib.rs ================================================ use std::borrow::Cow; use proc_macro::TokenStream; use proc_macro2::{Span, TokenStream as TokenStream2}; use quote::{quote, quote_spanned}; use syn::{ parse::{Parse, ParseStream}, parse_macro_input, parse_quote, punctuated::Punctuated, spanned::Spanned, token::Comma, Token, }; #[proc_macro_attribute] pub fn aggregate(_attr: TokenStream, item: TokenStream) -> TokenStream { // Parse the input tokens into a syntax tree let input = parse_macro_input!(item as Aggregate); let expanded = expand(input); if cfg!(feature = "print-generated") { println!("{expanded}"); } expanded.into() } // // Parser // // like ItemImpl except that we allow `name: Type "SqlType"` for `fn transition` struct Aggregate { schema: Option, name: syn::Ident, state_ty: AggregateTy, parallel_safe: Option, transition_fn: AggregateFn, final_fn: AggregateFn, serialize_fn: Option, deserialize_fn: Option, combine_fn: Option, } enum AggregateItem { State(AggregateTy), Fn(AggregateFn), ParallelSafe(AggregateParallelSafe), } struct AggregateTy { ident: syn::Ident, ty: Box, } struct AggregateParallelSafe { value: syn::LitBool, } struct AggregateFn { ident: syn::Ident, sql_name: Option, parens: syn::token::Paren, args: Punctuated, ret: syn::ReturnType, body: syn::Block, fcinfo: Option, } #[derive(Clone)] struct AggregateArg { rust: syn::PatType, sql: Option, } macro_rules! error { ($span: expr, $fmt: literal, $($arg:expr),* $(,)?) => { return Err(syn::Error::new($span, format!($fmt, $($arg),*))) }; ($span: expr, $msg: literal) => { return Err(syn::Error::new($span, $msg)) }; } macro_rules! check_duplicate { ($val: expr, $span:expr, $name: expr) => { if $val.is_some() { error!($span, "duplicate {}") } }; } impl Parse for Aggregate { fn parse(input: ParseStream) -> syn::Result { let _: Token![impl] = input.parse()?; let first_path_segment = input.parse()?; let (schema, name): (_, syn::Ident) = if input.peek(Token![::]) { let _: Token![::] = input.parse()?; (Some(first_path_segment), input.parse()?) } else { (None, first_path_segment) }; let body; let _brace_token = syn::braced!(body in input); let mut state_ty = None; let mut parallel_safe = None; let mut fns: Vec = vec![]; while !body.is_empty() { use AggregateItem::*; let item = body.parse()?; match item { State(ty) => { if ty.ident != "State" { error!( ty.ident.span(), "unexpected `type {}`, expected `State`", ty.ident ) } if state_ty.is_some() { error!(ty.ident.span(), "duplicate `type State`") } state_ty = Some(ty); } ParallelSafe(safe) => parallel_safe = Some(safe.value), Fn(f) => { fns.push(f); } } } let mut transition_fn = None; let mut final_fn = None; let mut serialize_fn = None; let mut deserialize_fn = None; let mut combine_fn = None; for f in fns { if f.ident == "transition" { check_duplicate!(transition_fn, f.ident.span(), "`fn transition`"); if f.args.is_empty() { error!( f.parens.span, "transition function must have at least one argument" ) } for arg in f.args.iter().skip(1) { if arg.sql.is_none() { error!(arg.rust.span(), "missing SQL type") } } transition_fn = Some(f); } else if f.ident == "finally" { check_duplicate!(final_fn, f.ident.span(), "`fn finally`"); if f.args.len() != 1 { error!( f.parens.span, "final function must have at one argument of type `Option>`" ) } if f.args[0].sql.is_some() { error!( f.args[0].sql.span(), "should not have SQL type, will be inferred" ) } final_fn = Some(f); } else if f.ident == "serialize" { check_duplicate!(serialize_fn, f.ident.span(), "`fn serialize`"); if f.args.len() != 1 { error!( f.parens.span, "serialize function must have at one argument of type `Inner`" ) } if f.args[0].sql.is_some() { error!( f.args[0].sql.span(), "should not have SQL type, will be inferred" ) } serialize_fn = Some(f); } else if f.ident == "deserialize" { check_duplicate!(deserialize_fn, f.ident.span(), "`fn deserialize`"); if f.args.len() != 1 { error!( f.parens.span, "deserialize function must have at one argument of type `bytea`" ) } if f.args[0].sql.is_some() { error!( f.args[0].sql.span(), "should not have SQL type, will be inferred" ) } deserialize_fn = Some(f); } else if f.ident == "combine" { check_duplicate!(combine_fn, f.ident.span(), "`fn combine`"); if f.args.len() != 2 { error!(f.parens.span, "deserialize function must have at one argument of type `Option>`") } for arg in &f.args { if arg.sql.is_some() { error!(arg.sql.span(), "should not have SQL type, will be inferred") } } combine_fn = Some(f) } else { error!( f.ident.span(), "unexpected `fn {}`, expected one of `transition`, `finally`, `serialize`, `deserialize`, or `combine`", f.ident ) } } let state_ty = match state_ty { Some(state_ty) => state_ty, None => error!(name.span(), "missing `type State = ...;`"), }; let transition_fn = match transition_fn { Some(transition_fn) => transition_fn, None => error!(name.span(), "missing `fn transition`"), }; let final_fn = match final_fn { Some(final_fn) => final_fn, None => error!(name.span(), "missing `fn final`"), }; Ok(Aggregate { schema, name, state_ty, parallel_safe, transition_fn, final_fn, serialize_fn, deserialize_fn, combine_fn, }) } } impl Parse for AggregateItem { fn parse(input: ParseStream) -> syn::Result { let ahead = input.fork(); let _ = ahead.call(syn::Attribute::parse_outer)?; let lookahead = ahead.lookahead1(); if lookahead.peek(Token![fn]) { input.parse().map(AggregateItem::Fn) } else if lookahead.peek(Token![type]) { input.parse().map(AggregateItem::State) } else if lookahead.peek(Token![const]) { input.parse().map(AggregateItem::ParallelSafe) } else { Err(lookahead.error()) } } } impl Parse for AggregateTy { fn parse(input: ParseStream) -> syn::Result { let _: Token![type] = input.parse()?; let ident = input.parse()?; let _: Token![=] = input.parse()?; let ty = Box::new(input.parse()?); let _: Token![;] = input.parse()?; Ok(Self { ident, ty }) } } impl Parse for AggregateParallelSafe { fn parse(input: ParseStream) -> syn::Result { let _: Token![const] = input.parse()?; let name: syn::Ident = input.parse()?; if name != "PARALLEL_SAFE" { error!( name.span(), "unexpected const `{}` expected `PARALLEL_SAFE`", name ) } let _: Token![:] = input.parse()?; let ty: syn::Ident = input.parse()?; if ty != "bool" { error!(ty.span(), "unexpected type `{}` expected `bool`", ty) } let _: Token![=] = input.parse()?; let value = input.parse()?; let _: Token![;] = input.parse()?; Ok(Self { value }) } } fn is_fcinfo(arg: &AggregateArg) -> bool { if let syn::Type::Path(p) = &*arg.rust.ty { for id in p.path.segments.iter() { if id.ident == "FunctionCallInfo" { return true; } } } false } impl Parse for AggregateFn { fn parse(input: ParseStream) -> syn::Result { let mut attributes = input.call(syn::Attribute::parse_outer)?; let _: Token![fn] = input.parse()?; let ident = input.parse()?; let contents; let parens = syn::parenthesized!(contents in input); let mut args = Punctuated::new(); let mut fcinfo = None; while !contents.is_empty() { let arg: AggregateArg = contents.parse()?; if is_fcinfo(&arg) { fcinfo = Some(arg); if contents.is_empty() { break; } let _comma: Token![,] = contents.parse()?; continue; } args.push(arg); if contents.is_empty() { break; } let comma: Token![,] = contents.parse()?; args.push_punct(comma); } let ret = input.parse()?; let body = input.parse()?; let expected_path = parse_quote!(sql_name); let sql_name = match take_attr(&mut attributes, &expected_path) { None => None, Some(attribute) => attribute.parse_args()?, }; if !attributes.is_empty() { error!(attributes[0].span(), "unexpected attribute") } Ok(Self { ident, sql_name, parens, args, ret, body, fcinfo, }) } } impl Parse for AggregateArg { fn parse(input: ParseStream) -> syn::Result { let arg: syn::FnArg = input.parse()?; let mut rust = match arg { syn::FnArg::Typed(pat) => pat, _ => error!(arg.span(), "`self` is not a valid aggregate argument"), }; let sql = { let expected_path = parse_quote!(sql_type); let attribute = take_attr(&mut rust.attrs, &expected_path); match attribute { None => None, Some(attribute) => attribute.parse_args()?, } }; Ok(Self { rust, sql }) } } fn take_attr(attrs: &mut Vec, path: &syn::Path) -> Option { let idx = attrs.iter().enumerate().find(|(_, a)| &a.path == path); match idx { None => None, Some((idx, _)) => { let attribute = attrs.remove(idx); Some(attribute) } } } // // Expander // fn expand(agg: Aggregate) -> TokenStream2 { use std::fmt::Write; let Aggregate { schema, name, state_ty, parallel_safe, transition_fn, final_fn, serialize_fn, deserialize_fn, combine_fn, } = agg; let state_ty = state_ty.ty; let transition_fns = transition_fn.transition_fn_tokens(&schema, &name); let final_fns = final_fn.final_fn_tokens(&schema, &name); let mut extension_sql_reqs = vec![ transition_fn.outer_ident(&name), final_fn.outer_ident(&name), ]; let schema_qualifier = match &schema { Some(schema) => format!("{schema}."), None => String::new(), }; let mut create = format!("\nCREATE AGGREGATE {schema_qualifier}{name} ("); for (i, (name, arg)) in transition_fn.sql_args().enumerate() { if i != 0 { let _ = write!(&mut create, ", "); } if let Some(name) = name { let _ = write!(&mut create, "{name} "); } let _ = write!(&mut create, "{arg}"); } let transition_fn_ident = transition_fn.outer_ident(&name); let final_fn_ident = final_fn.outer_ident(&name); let _ = write!( &mut create, ") (\n \ stype = internal,\n \ sfunc = {schema_qualifier}{transition_fn_ident},\n \ finalfunc = {schema_qualifier}{final_fn_ident}" ); let parallel_safe = parallel_safe.map(|p| { let value = p.value(); let _ = write!( &mut create, ",\n parallel = {}", if value { "safe" } else { "unsafe" } ); let serialize_fn_check = value .then(|| { serialize_fn.as_ref().is_none().then(|| { quote_spanned!(p.span()=> compile_error!("parallel safety requires a `fn serialize()` also"); ) }) }) .flatten(); let deserialize_fn_check = value .then(|| { deserialize_fn.as_ref().is_none().then(|| { quote_spanned!(p.span()=> compile_error!("parallel safety requires a `fn deserialize()` also"); ) }) }) .flatten(); let combine_fn_check = value .then(|| { combine_fn.as_ref().is_none().then(|| { quote_spanned!(p.span()=> compile_error!("parallel safety requires a `fn combine()` also"); ) }) }) .flatten(); quote_spanned!(p.span()=> #serialize_fn_check #deserialize_fn_check #combine_fn_check #[allow(dead_code)] pub const PARALLEL_SAFE: bool = #value; ) }); let mut add_function = |f: AggregateFn, field: &str, make_tokens: fn(&AggregateFn, &Option, &syn::Ident) -> TokenStream2| { extension_sql_reqs.push(f.outer_ident(&name)); let _ = write!( &mut create, ",\n {} = {}{}", field, schema_qualifier, f.outer_ident(&name) ); make_tokens(&f, &schema, &name) }; let serialize_fns_check = serialize_fn.as_ref().xor(deserialize_fn.as_ref()).map(|_| { let s = serialize_fn.as_ref().map(|f| { quote_spanned!(f.ident.span()=> compile_error!("`fn deserialize()` is also required"); ) }); let d = deserialize_fn.as_ref().map(|f| { quote_spanned!(f.ident.span()=> compile_error!("`fn serialize()` is also required"); ) }); quote!(#s #d) }); let combine_fns_check1 = serialize_fn.as_ref().xor(combine_fn.as_ref()).map(|_| { let s = serialize_fn.as_ref().map(|f| { quote_spanned!(f.ident.span()=> compile_error!("`fn combine()` is also required"); ) }); let c = combine_fn.as_ref().map(|f| { quote_spanned!(f.ident.span()=> compile_error!("`fn serialize()` is also required"); ) }); quote!(#s #c) }); let combine_fns_check2 = combine_fn.as_ref().xor(deserialize_fn.as_ref()).map(|_| { let s = combine_fn.as_ref().map(|f| { quote_spanned!(f.ident.span()=> compile_error!("`fn deserialize()` is also required"); ) }); let d = deserialize_fn.as_ref().map(|f| { quote_spanned!(f.ident.span()=> compile_error!("`fn combine()` is also required"); ) }); quote!(#s #d) }); let serialize_fns = serialize_fn.map(|f| add_function(f, "serialfunc", AggregateFn::serialize_fn_tokens)); let deserialize_fns = deserialize_fn.map(|f| add_function(f, "deserialfunc", AggregateFn::deserialize_fn_tokens)); let combine_fns = combine_fn.map(|f| add_function(f, "combinefunc", AggregateFn::combine_fn_tokens)); let _ = write!(&mut create, "\n);\n"); let extension_sql_name = format!("{name}_extension_sql"); quote! { pub mod #name { use super::*; pub type State = #state_ty; #serialize_fns_check #combine_fns_check1 #combine_fns_check2 #parallel_safe #transition_fns #final_fns #serialize_fns #deserialize_fns #combine_fns pgrx::extension_sql!( #create, name=#extension_sql_name, requires=[#(#extension_sql_reqs),*], ); } } } impl AggregateFn { fn transition_fn_tokens( &self, schema: &Option, aggregate_name: &syn::Ident, ) -> TokenStream2 { let outer_ident = self.outer_ident(aggregate_name); let Self { ident, args, body, ret, fcinfo, .. } = self; let schema = schema.as_ref().map(|s| { let s = format!("{s}"); quote!(, schema = #s) }); let input_ty = &*args[0].rust.ty; let state_type_check = state_type_check_tokens(input_ty, Some(())); let fcinfo_arg = if let Some(fcinfo) = fcinfo { fcinfo.clone() } else { syn::parse_str::("__fcinfo: pg_sys::FunctionCallInfo").unwrap() }; let mut expanded_args = args.clone(); if let Some(fcinfo) = fcinfo { let trailing = expanded_args.trailing_punct(); if !trailing { expanded_args.push_punct(Comma::default()); } expanded_args.push_value(fcinfo.clone()); if trailing { expanded_args.push_punct(Comma::default()); } } let fcinfo_ident = arg_ident(&fcinfo_arg); let arg_signatures = args .iter() .chain(std::iter::once(&fcinfo_arg)) .skip(1) .map(|arg| &arg.rust); let arg_vals: Punctuated = expanded_args.iter().skip(1).map(arg_ident).collect(); let inner_arg_signatures = expanded_args.iter().map(|arg| &arg.rust); let return_type_check = state_type_check_tokens(&ret_type(ret), Some(())); // use different variables for these to ensure the type-check is called let input_var = syn::Ident::new("__inner", input_ty.span()); let input_state_var = syn::Ident::new("state", input_ty.span()); let input_type_check = quote_spanned!(input_ty.span()=> let inner: Option = match &mut #input_var { None => None, Some(inner) => Option::take(&mut **inner), }; let #input_state_var: #input_ty = inner; ); // use different variables for these to ensure the type-check is called let result_var = syn::Ident::new("result", ret_type(ret).span()); let state_var = syn::Ident::new("state", ret_type(ret).span()); let result_type_check = quote_spanned!(state_var.span()=> let #state_var: Option = #result_var; ); quote! { #state_type_check #return_type_check #[pgrx::pg_extern(immutable, parallel_safe #schema)] pub fn #outer_ident( #input_var: pgrx::Internal, #(#arg_signatures,)* ) -> Option { use crate::palloc::{Inner, InternalAsValue, ToInternal}; unsafe { let mut #input_var: Option>> = #input_var.to_inner(); #input_type_check crate::aggregate_utils::in_aggregate_context(#fcinfo_ident, || { let #result_var = #ident(#input_state_var, #arg_vals); #result_type_check #input_var = match (#input_var, state) { (None, None) => None, (None, state @ Some(..)) => { Some(state.into()) }, (Some(mut inner), state) => { *inner = state; Some(inner) }, }; #input_var.internal() }) } } pub fn #ident(#(#inner_arg_signatures),*) #ret #body } } fn final_fn_tokens( &self, schema: &Option, aggregate_name: &syn::Ident, ) -> TokenStream2 { let outer_ident = self.outer_ident(aggregate_name); let Self { ident, args, ret, body, .. } = self; let schema = schema.as_ref().map(|s| { let s = format!("{s}"); quote!(, schema = #s) }); let input_ty = &*args[0].rust.ty; let state_type_check = type_check_tokens(input_ty, parse_quote!(Option<&mut State>)); let arg_vals: Punctuated = args.iter().skip(1).map(arg_ident).collect(); let inner_arg_signatures = args.iter().map(|arg| &arg.rust); // use different variables for these to ensure the type-check is called let input_var = syn::Ident::new("input", input_ty.span()); let state_var = syn::Ident::new("state", input_ty.span()); let input_type_check = quote_spanned!(input_ty.span()=> let #state_var: #input_ty = #input_var; ); quote! { #state_type_check #[pgrx::pg_extern(immutable, parallel_safe #schema)] pub fn #outer_ident( __internal: pgrx::Internal, __fcinfo: pg_sys::FunctionCallInfo ) #ret { use crate::palloc::InternalAsValue; unsafe { let mut #input_var: Option>> = __internal.to_inner(); let #input_var: Option<&mut State> = #input_var.as_deref_mut() .map(|i| i.as_mut()).flatten(); #input_type_check #ident(#state_var, #arg_vals) } } pub fn #ident(#(#inner_arg_signatures,)*) #ret #body } } fn serialize_fn_tokens( &self, schema: &Option, aggregate_name: &syn::Ident, ) -> TokenStream2 { let outer_ident = self.outer_ident(aggregate_name); let Self { ident, args, ret, body, .. } = self; let schema = schema.as_ref().map(|s| { let s = format!("{s}"); quote!(, schema = #s) }); let input_ty = &*args[0].rust.ty; let state_type_check = refstate_type_check_tokens(input_ty, None); let return_type_check = bytea_type_check_tokens(&ret_type(ret)); let inner_arg_signatures = args.iter().map(|arg| &arg.rust); // use different variables for these to ensure the type-check is called let input_var = syn::Ident::new("input", input_ty.span()); let state_var = syn::Ident::new("state", input_ty.span()); let input_type_check = quote_spanned!(input_ty.span()=> let #state_var: #input_ty = #input_var; ); quote! { #state_type_check #return_type_check #[pgrx::pg_extern(strict, immutable, parallel_safe #schema)] pub fn #outer_ident( __internal: pgrx::Internal, ) -> bytea { use crate::palloc::{Inner, InternalAsValue}; let #input_var: Option>> = unsafe { __internal.to_inner() }; let mut #input_var: Inner> = #input_var.unwrap(); let #input_var: &mut State = #input_var.as_mut().unwrap(); #input_type_check #ident(#state_var) } #[allow(clippy::ptr_arg)] pub fn #ident(#(#inner_arg_signatures,)*) -> bytea #body } } fn deserialize_fn_tokens( &self, schema: &Option, aggregate_name: &syn::Ident, ) -> TokenStream2 { let outer_ident = self.outer_ident(aggregate_name); let Self { ident, args, ret, body, .. } = self; let schema = schema.as_ref().map(|s| { let s = format!("{s}"); quote!(, schema = #s) }); let state_name = arg_ident(&args[0]); let state_type_check = bytea_type_check_tokens(&args[0].rust.ty); let return_type_check = state_type_check_tokens(&ret_type(ret), None); // use different variables for these to ensure the type-check is called let result_var = syn::Ident::new("result", ret_type(ret).span()); let state_var = syn::Ident::new("state", ret_type(ret).span()); let result_type_check = quote_spanned!(state_var.span()=> let #state_var: State = #result_var; ); // int8_avg_deserialize allocates in CurrentMemoryContext, so we do the same // https://github.com/postgres/postgres/blob/f920f7e799c587228227ec94356c760e3f3d5f2b/src/backend/utils/adt/numeric.c#L5728-L5770 quote! { #state_type_check #return_type_check #[pgrx::pg_extern(strict, immutable, parallel_safe #schema)] pub fn #outer_ident( bytes: crate::raw::bytea, _internal: Internal ) -> Option { use crate::palloc::ToInternal; let #result_var = #ident(bytes); #result_type_check let state: Inner> = Some(state).into(); unsafe { Some(state).internal() } } pub fn #ident(#state_name: crate::raw::bytea) #ret #body } } fn combine_fn_tokens( &self, schema: &Option, aggregate_name: &syn::Ident, ) -> TokenStream2 { let outer_ident = self.outer_ident(aggregate_name); let Self { ident, args, ret, body, .. } = self; let schema = schema.as_ref().map(|s| { let s = format!("{s}"); quote!(, schema = #s) }); let a_name = arg_ident(&args[0]); let b_name = arg_ident(&args[1]); let state_type_check_a = refstate_type_check_tokens(&args[0].rust.ty, Some(())); let state_type_check_b = refstate_type_check_tokens(&args[1].rust.ty, Some(())); let return_type_check = state_type_check_tokens(&ret_type(ret), Some(())); let inner_arg_signatures = args.iter().map(|arg| &arg.rust); // use different variables for these to ensure the type-check is called let result_var = syn::Ident::new("result", ret_type(ret).span()); let state_var = syn::Ident::new("state", ret_type(ret).span()); let result_type_check = quote_spanned!(state_var.span()=> let #state_var: Option = #result_var; ); let mod_counters = make_mod_counters(); quote! { #state_type_check_a #state_type_check_b #return_type_check #mod_counters #[pgrx::pg_extern(immutable, parallel_safe #schema)] pub fn #outer_ident( #a_name: Internal, #b_name: Internal, __fcinfo: pg_sys::FunctionCallInfo ) -> Option { use crate::palloc::{Inner, InternalAsValue, ToInternal}; unsafe { crate::aggregate_utils::in_aggregate_context(__fcinfo, || { let a: Option> = #a_name.to_inner(); let b: Option> = #b_name.to_inner(); #[cfg(any(test, feature = "pg_test"))] #aggregate_name::counters::increment_combine(&a, &b); let #result_var = #ident( a.as_deref(), b.as_deref(), ); #result_type_check let state = match #state_var { None => None, state @ Some(..) => { let state: Inner> = state.into(); Some(state) }, }; state.internal() }) } } #[allow(clippy::ptr_arg)] pub fn #ident(#(#inner_arg_signatures,)*) #ret #body } } fn outer_ident(&self, aggregate_name: &syn::Ident) -> syn::Ident { let name = match &self.sql_name { Some(name) => name.value(), None => format!("{}_{}_fn_outer", aggregate_name, self.ident), }; syn::Ident::new(&name, Span::call_site()) } fn sql_args(&self) -> impl Iterator, String)> { self.args.iter().skip(1).map(|arg| { let ident = match &*arg.rust.pat { syn::Pat::Ident(id) => Some(&id.ident), _ => None, }; (ident, arg.sql.as_ref().expect("missing sql arg").value()) }) } } fn arg_ident(arg: &AggregateArg) -> syn::Pat { syn::Pat::clone(&*arg.rust.pat) } fn make_mod_counters() -> TokenStream2 { quote! { #[cfg(any(test, feature = "pg_test"))] pub mod counters { use ::std::sync::atomic::{AtomicUsize, Ordering::Relaxed}; use crate::palloc::Inner; pub static COMBINE_NONE: AtomicUsize = AtomicUsize::new(0); pub static COMBINE_A: AtomicUsize = AtomicUsize::new(0); pub static COMBINE_B: AtomicUsize = AtomicUsize::new(0); pub static COMBINE_BOTH: AtomicUsize = AtomicUsize::new(0); // Works as long as only one pg_test is run at a time. If we have two // running in the same process, need a mutex to ensure only one test is // using the counters at a time. Otherwise, a test may see non-zero // counters because of another test's work rather than its own. pub fn reset() { COMBINE_NONE.store(0, Relaxed); COMBINE_A.store(0, Relaxed); COMBINE_B.store(0, Relaxed); COMBINE_BOTH.store(0, Relaxed); } pub fn increment_combine(a: &Option>, b: &Option>) { match (a, b) { // TODO Remove COMBINE_NONE? We suspect postgres never calls with (None, None); what would be the point? (None, None) => COMBINE_NONE.fetch_add(1, Relaxed), // TODO Remove COMBIINE_A? We suspect postgres never calls with (Some, None), only (None, Some). (Some(_), None) => COMBINE_A.fetch_add(1, Relaxed), (None, Some(_)) => COMBINE_B.fetch_add(1, Relaxed), (Some(_), Some(_)) => COMBINE_BOTH.fetch_add(1, Relaxed), }; } } } } fn ret_type(ret: &syn::ReturnType) -> Cow<'_, syn::Type> { match ret { syn::ReturnType::Default => Cow::Owned(parse_quote!(())), syn::ReturnType::Type(_, ty) => Cow::Borrowed(ty), } } fn state_type_check_tokens(ty: &syn::Type, optional: Option<()>) -> TokenStream2 { match optional { Some(..) => type_check_tokens(ty, parse_quote!(Option)), None => type_check_tokens(ty, parse_quote!(State)), } } fn refstate_type_check_tokens(ty: &syn::Type, optional: Option<()>) -> TokenStream2 { match optional { Some(..) => type_check_tokens(ty, parse_quote!(Option<&State>)), None => { // we need to allow both &State and &mut State, so we use a // different equality-checker for this case than the others quote_spanned! {ty.span()=> const _: () = { trait RefType { type Referenced; } impl<'a, T> RefType for &'a T { type Referenced = T; } impl<'a, T> RefType for &'a mut T { type Referenced = T; } fn check>() {} let _checked = check::<#ty>; }; } } } } fn bytea_type_check_tokens(ty: &syn::Type) -> TokenStream2 { type_check_tokens(ty, parse_quote!(bytea)) } fn type_check_tokens(user_ty: &syn::Type, expected_type: syn::Type) -> TokenStream2 { quote_spanned! {user_ty.span()=> const _: () = { trait SameType { type This; } impl SameType for T { type This = Self; } fn check_type>() {} let _checked = check_type::<#expected_type, #user_ty>; }; } } ================================================ FILE: crates/asap/Cargo.toml ================================================ [package] name = "asap" version = "0.1.0" edition = "2021" [dependencies] ================================================ FILE: crates/asap/src/fft.rs ================================================ // based on https://github.com/stanford-futuredata/ASAP/blob/8b39db4bc92590cbe5b44ddace9b7bb1d677248b/ASAP-optimized.js // original copyright notice as follows // // Free FFT and convolution (JavaScript) // // Copyright (c) 2014 Project Nayuki // https://www.nayuki.io/page/free-small-fft-in-multiple-languages // // (MIT License) // Permission is hereby granted, free of charge, to any person obtaining a copy of // this software and associated documentation files (the "Software"), to deal in // the Software without restriction, including without limitation the rights to // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // - The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // - The Software is provided "as is", without warranty of any kind, express or // implied, including but not limited to the warranties of merchantability, // fitness for a particular purpose and noninfringement. In no event shall the // authors or copyright holders be liable for any claim, damages or other // liability, whether in an action of contract, tort or otherwise, arising from, // out of or in connection with the Software or the use or other dealings in the // Software. // TODO JOSH it looks like they have a rust version as well, // we likely should be using that instead use std::f64::consts::PI; /* * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. * The vector can have any length. This is a wrapper function. */ pub fn transform(real: &mut [f64], imag: &mut [f64]) { assert_eq!(real.len(), imag.len()); let n = real.len(); if n == 0 { } else if n & (n - 1) == 0 { // Is power of 2 transform_radix2(real, imag); } else { // More complicated algorithm for arbitrary sizes transform_bluestein(real, imag); } } /* * Computes the inverse discrete Fourier transform (IDFT) of the given complex vector, storing the result back into the vector. * The vector can have any length. This is a wrapper function. This transform does not perform scaling, so the inverse is not a true inverse. */ pub fn inverse_transform(real: &mut [f64], imag: &mut [f64]) { transform(imag, real); } /* * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. * The vector's length must be a power of 2. Uses the Cooley-Tukey decimation-in-time radix-2 algorithm. */ fn transform_radix2(real: &mut [f64], imag: &mut [f64]) { // Initialization let n = real.len(); if n == 1 { // Trivial transform return; } let mut levels = 100; for i in 0..32 { if 1 << i == n { levels = i; // Equal to log2(n) } } debug_assert!(levels < 32); let mut cos_table = vec![0.0; n / 2]; let mut sin_table = vec![0.0; n / 2]; for i in 0..n / 2 { cos_table[i] = (2.0 * PI * i as f64 / n as f64).cos(); sin_table[i] = (2.0 * PI * i as f64 / n as f64).sin(); } // Bit-reversed addressing permutation for i in 0..n { let j = reverse_bits(i as u32, levels) as usize; if j > i { real.swap(i, j); imag.swap(i, j); } } // Cooley-Tukey decimation-in-time radix-2 FFT let mut size = 2; while size <= n { let halfsize = size / 2; let tablestep = n / size; for i in (0..n).step_by(size) { let mut j = i; let mut k = 0; while j < i + halfsize { let tpre = real[j + halfsize] * cos_table[k] + imag[j + halfsize] * sin_table[k]; let tpim = -real[j + halfsize] * sin_table[k] + imag[j + halfsize] * cos_table[k]; real[j + halfsize] = real[j] - tpre; imag[j + halfsize] = imag[j] - tpim; real[j] += tpre; imag[j] += tpim; j += 1; k += tablestep; } } size *= 2; } // Returns the integer whose value is the reverse of the lowest 'bits' bits of the integer 'x'. fn reverse_bits(x: u32, bits: u32) -> u32 { let mut x = x; let mut y = 0; for _ in 0..bits { y = (y << 1) | (x & 1); x >>= 1; } y } } /* * Computes the discrete Fourier transform (DFT) of the given complex vector, storing the result back into the vector. * The vector can have any length. This requires the convolution function, which in turn requires the radix-2 FFT function. * Uses Bluestein's chirp z-transform algorithm. */ fn transform_bluestein(real: &mut [f64], imag: &mut [f64]) { // Find a power-of-2 convolution length m such that m >= n * 2 + 1 let n = real.len(); let mut m = 1; while m < n * 2 + 1 { m *= 2; } // Trignometric tables let mut cos_table = vec![0.0; n]; let mut sin_table = vec![0.0; n]; for i in 0..n { let j = (i * i % (n * 2)) as f64; // This is more accurate than j = i * i cos_table[i] = (PI * j / n as f64).cos(); sin_table[i] = (PI * j / n as f64).sin(); } // Temporary vectors and preprocessing let mut areal = vec![0.0; m]; let mut aimag = vec![0.0; m]; for i in 0..n { areal[i] = real[i] * cos_table[i] + imag[i] * sin_table[i]; aimag[i] = -real[i] * sin_table[i] + imag[i] * cos_table[i]; } for i in n..m { areal[i] = 0.0; aimag[i] = 0.0; } let mut breal = vec![0.0; m]; let mut bimag = vec![0.0; m]; breal[0] = cos_table[0]; bimag[0] = sin_table[0]; for i in 1..n { breal[i] = cos_table[i]; breal[m - i] = cos_table[i]; bimag[i] = sin_table[i]; bimag[m - i] = sin_table[i]; } for i in n..=(m - n) { breal[i] = 0.0; bimag[i] = 0.0; } // Convolution let mut creal = vec![0.0; m]; let mut cimag = vec![0.0; m]; convolve_complex( &mut areal, &mut aimag, &mut breal, &mut bimag, &mut creal, &mut cimag, ); // Postprocessing for i in 0..n { real[i] = creal[i] * cos_table[i] + cimag[i] * sin_table[i]; imag[i] = -creal[i] * sin_table[i] + cimag[i] * cos_table[i]; } } // /* // * Computes the circular convolution of the given real vectors. Each vector's length must be the same. // */ // function convolveReal(x, y, out) { // if (x.length != y.length || x.length != out.length) // throw "Mismatched lengths"; // var zeros = new Array(x.length); // for (var i = 0; i < zeros.length; i++) // zeros[i] = 0; // convolve_complex(x, zeros, y, zeros.slice(), out, zeros.slice()); // } // /* // * Computes the circular convolution of the given complex vectors. Each vector's length must be the same. // */ fn convolve_complex( xreal: &mut [f64], ximag: &mut [f64], yreal: &mut [f64], yimag: &mut [f64], outreal: &mut [f64], outimag: &mut [f64], ) { let n = xreal.len(); transform(xreal, ximag); transform(yreal, yimag); for i in 0..n { let temp = xreal[i] * yreal[i] - ximag[i] * yimag[i]; ximag[i] = ximag[i] * yreal[i] + xreal[i] * yimag[i]; xreal[i] = temp; } inverse_transform(xreal, ximag); for i in 0..n { // Scaling (because this FFT implementation omits it) outreal[i] = xreal[i] / n as f64; outimag[i] = ximag[i] / n as f64; } } ================================================ FILE: crates/asap/src/lib.rs ================================================ // based on https://github.com/stanford-futuredata/ASAP/blob/8b39db4bc92590cbe5b44ddace9b7bb1d677248b/ASAP-optimized.js // original copyright notice as follows // // Free FFT and convolution (JavaScript) // // Copyright (c) 2014 Project Nayuki // https://www.nayuki.io/page/free-small-fft-in-multiple-languages // // (MIT License) // Permission is hereby granted, free of charge, to any person obtaining a copy of // this software and associated documentation files (the "Software"), to deal in // the Software without restriction, including without limitation the rights to // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, // subject to the following conditions: // - The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // - The Software is provided "as is", without warranty of any kind, express or // implied, including but not limited to the warranties of merchantability, // fitness for a particular purpose and noninfringement. In no event shall the // authors or copyright holders be liable for any claim, damages or other // liability, whether in an action of contract, tort or otherwise, arising from, // out of or in connection with the Software or the use or other dealings in the // Software. mod fft; // Smooth out the data to promote human readability, resolution is an upper bound on the number of points returned pub fn asap_smooth(data: &[f64], resolution: u32) -> Vec { use std::borrow::Cow; let data = if data.len() > 2 * resolution as usize { let period = (data.len() as f64 / resolution as f64) as u32; Cow::Owned(sma(data, period, period)) } else { Cow::Borrowed(data) }; let mut acf = Acf::new(&data, (data.len() as f64 / 10.0).round() as u32); let peaks = acf.find_peaks(); let mut metrics = Metrics::new(&data); let original_kurt = metrics.kurtosis(); let mut min_obj = metrics.roughness(); let mut window_size = 1_u32; let mut lb = 1; let mut largest_feasible = -1_i32; let mut tail = data.len() as u32 / 10; for i in (0..peaks.len()).rev() { let w = peaks[i]; if w < lb || w == 1 { break; } else if (1.0 - acf.correlations[w as usize]).sqrt() * window_size as f64 > (1.0 - acf.correlations[window_size as usize]).sqrt() * w as f64 { continue; } let smoothed = sma(&data, w, 1); metrics = Metrics::new(&smoothed); if metrics.kurtosis() >= original_kurt { let roughness = metrics.roughness(); if roughness < min_obj { min_obj = roughness; window_size = w; } let test_lb = w as f64 * (acf.max_acf - 1.0).sqrt() / (acf.correlations[w as usize] - 1.0); if test_lb > lb as f64 { lb = test_lb.round() as u32; } if largest_feasible < 0 { largest_feasible = i as i32; } } } if largest_feasible > 0 { if largest_feasible < (peaks.len() - 2) as i32 { tail = peaks[(largest_feasible + 1) as usize]; } if peaks[largest_feasible as usize] + 1 > lb { lb = peaks[largest_feasible as usize] + 1; } } window_size = binary_search(lb, tail, &data, min_obj, original_kurt, window_size); sma(&data, window_size, 1) } fn binary_search( head: u32, tail: u32, data: &[f64], min_obj: f64, original_kurt: f64, window_size: u32, ) -> u32 { let mut head = head; let mut tail = tail; let mut min_obj = min_obj; let mut window_size = window_size; while head <= tail { let w = (head + tail).div_ceil(2); let smoothed = sma(data, w, 1); let metrics = Metrics::new(&smoothed); if metrics.kurtosis() >= original_kurt { /* Search second half if feasible */ let roughness = metrics.roughness(); if roughness < min_obj { window_size = w; min_obj = roughness; } head = w + 1; } else { /* Search first half */ tail = w - 1; } } window_size } fn sma(data: &[f64], range: u32, slide: u32) -> Vec { let mut window_start = 0; let mut sum = 0.0; let mut count = 0; let mut values = Vec::new(); for (i, val) in data.iter().enumerate() { sum += val; count += 1; if i + 1 - window_start >= range as usize { values.push(sum / count as f64); let old_start = window_start; while window_start < data.len() && window_start - old_start < slide as usize { sum -= data[window_start]; count -= 1; window_start += 1; } } } values } fn mean(values: &[f64]) -> f64 { values.iter().sum::() / values.len() as f64 } fn std(values: &[f64]) -> f64 { let m = mean(values); let std: f64 = values.iter().map(|&x| (x - m).powi(2)).sum(); (std / values.len() as f64).sqrt() } impl<'a> Acf<'a> { fn new(values: &'a [f64], max_lag: u32) -> Acf<'a> { let mut acf = Acf { mean: mean(values), values, correlations: Vec::with_capacity(max_lag as usize), max_acf: 0.0, }; acf.calculate(); acf } fn calculate(&mut self) { /* Padding to the closest power of 2 */ let len = (2_u32).pow((self.values.len() as f64).log2() as u32 + 1); let mut fftreal = vec![0.0; len as usize]; let mut fftimg = vec![0.0; len as usize]; for (i, real) in fftreal.iter_mut().enumerate().take(self.values.len()) { *real = self.values[i] - self.mean; } /* F_R(f) = FFT(X) */ fft::transform(&mut fftreal, &mut fftimg); /* S(f) = F_R(f)F_R*(f) */ for i in 0..fftreal.len() { fftreal[i] = fftreal[i].powi(2) + fftimg[i].powi(2); fftimg[i] = 0.0; } /* R(t) = IFFT(S(f)) */ fft::inverse_transform(&mut fftreal, &mut fftimg); for i in 1..self.correlations.len() { self.correlations[i] = fftreal[i] / fftreal[0]; } } fn find_peaks(&mut self) -> Vec { const CORR_THRESH: f64 = 0.2; let mut peak_indicies = Vec::new(); if self.correlations.len() > 1 { let mut positive = self.correlations[1] > self.correlations[0]; let mut max = 1; for i in 2..self.correlations.len() { if !positive && self.correlations[i] > self.correlations[i - 1] { max = i; positive = !positive; } else if positive && self.correlations[i] > self.correlations[max] { max = i; } else if positive && self.correlations[i] < self.correlations[i - 1] && max > 1 && self.correlations[max] > CORR_THRESH { peak_indicies.push(max as u32); if self.correlations[max] > self.max_acf { self.max_acf = self.correlations[max]; } positive = !positive; } } } /* If there is no autocorrelation peak within the MAX_WINDOW boundary, # try windows from the largest to the smallest */ if peak_indicies.len() <= 1 { for i in 2..self.correlations.len() { peak_indicies.push(i as u32); } } peak_indicies } } struct Metrics<'a> { len: u32, values: &'a [f64], m: f64, } impl Metrics<'_> { fn new(values: &[f64]) -> Metrics<'_> { Metrics { len: values.len() as u32, values, m: mean(values), } } fn kurtosis(&self) -> f64 { let mut u4 = 0.0; let mut variance = 0.0; for value in self.values { u4 += (value - self.m).powi(4); variance += (value - self.m).powi(2); } self.len as f64 * u4 / variance.powi(2) } fn roughness(&self) -> f64 { std(&self.diffs()) } fn diffs(&self) -> Vec { let mut diff = vec![0.0; (self.len - 1) as usize]; for i in 1..self.len as usize { diff[i - 1] = self.values[i] - self.values[i - 1]; } diff } } struct Acf<'a> { mean: f64, values: &'a [f64], correlations: Vec, max_acf: f64, } #[cfg(test)] mod tests { use super::*; #[test] fn simple_sma_test() { let data = vec![0.0, 1.0, 2.0, 3.0, 4.0]; let test = sma(&data, 3, 1); assert_eq!(test, vec![1.0, 2.0, 3.0]); } #[test] fn sma_slide_test() { let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]; let test = sma(&data, 3, 2); assert_eq!(test, vec![1.0, 3.0, 5.0]); } #[test] fn sma_slide_unaliged_test() { let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]; let test = sma(&data, 3, 2); assert_eq!(test, vec![1.0, 3.0, 5.0]); } #[test] fn sma_downsample_test() { let data = vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]; let test = sma(&data, 2, 2); assert_eq!(test, vec![0.5, 2.5, 4.5, 6.5]); } #[test] fn test_roughness_and_kurtosis() { let series_a = vec![-1.0, 1.0, -1.0, 1.0, -1.0, 1.0]; // bimodal let x = (3.0 - ((9.0 + 8.0 * 1.82) as f64).sqrt()) / 4.0; // ~ -0.45, calculated for specific mean and std let series_b = vec![-1.0, -0.8, x, -0.2, 0.5, 1.5 - x]; // uneven but monotonically increasing let x = ((1.0 / 2.0) as f64).sqrt(); let series_c = vec![-2.0 * x, -1.0 * x, 0.0, x, 2.0 * x]; // linear assert_eq!(mean(&series_a), 0.0); assert_eq!(std(&series_a), 1.0); assert_eq!(mean(&series_b), 0.0); assert_eq!(std(&series_b), 1.0); assert!(mean(&series_c).abs() < 0.000000000001); // float precision breaks == 0 here assert_eq!(std(&series_c), 1.0); let test = Metrics::new(&series_a); assert_eq!( test.roughness(), ((3.0 * 1.6_f64.powi(2) + 2.0 * 2.4_f64.powi(2)) / 5.0).sqrt() ); assert_eq!(test.kurtosis(), 1.0); let test = Metrics::new(&series_b); assert_eq!(test.roughness(), 0.4686099077599554); // manually verified assert!((test.kurtosis() - 2.7304).abs() < 0.000000000001); // = 2.7304 let test = Metrics::new(&series_c); assert_eq!(test.roughness(), 0.0); assert!((test.kurtosis() - 1.7).abs() < 0.000000000001); // == 1.7 } #[test] fn test_smoothing() { // Monthly English temperature data from 1723 through 1970 (~3k pts) #[rustfmt::skip] let data = vec![1.1,4.4,7.5,8.9,11.7,15.0,15.3,15.6,13.3,11.1,7.5,5.8,5.6,4.2,4.7,7.2,11.4,15.3,15.0,16.2,14.4,8.6,5.3,3.3,4.4,3.3,5.0,8.1,10.8,12.2,13.8,13.3,12.8,9.4,6.9,3.9,1.1,4.2,4.2,8.4,13.4,16.4,16.0,15.6,14.7,10.2,6.1,1.8,4.2,5.0,5.1,9.2,13.6,14.9,16.9,16.9,14.4,10.8,4.7,3.6,3.9,2.4,7.1,8.3,12.5,16.4,16.9,16.0,12.8,9.1,7.2,1.6,1.2,2.3,2.8,7.1,10.3,15.1,16.8,15.7,16.6,10.1,8.1,5.0,4.1,4.7,6.2,8.7,12.4,14.0,15.3,16.3,15.3,10.9,9.2,3.4,1.9,2.2,6.0,6.8,12.1,15.6,16.3,16.7,15.3,12.3,7.8,5.2,2.4,6.4,6.1,8.9,11.4,14.6,16.0,16.6,14.5,10.9,6.3,2.2,6.9,6.0,5.9,10.0,11.2,15.2,18.3,16.1,12.8,9.1,6.5,7.6,4.3,6.4,8.1,9.3,11.1,14.1,16.2,16.2,13.3,8.4,6.2,4.0,4.4,4.0,5.8,8.9,10.9,13.3,14.8,16.2,14.2,10.3,6.3,5.4,6.4,3.1,6.9,8.6,10.6,15.7,16.4,17.8,14.4,10.4,6.9,6.4,6.2,4.2,6.1,8.8,12.5,15.9,17.4,13.8,14.2,8.9,6.1,4.9,4.6,4.6,5.5,9.9,11.4,14.2,16.4,16.0,12.5,10.2,6.3,6.1,4.0,6.8,5.8,6.7,11.6,15.2,16.0,14.7,13.1,9.6,3.7,3.2,2.8,1.6,3.9,6.4,8.6,12.8,15.3,14.7,14.0,5.3,3.3,2.2,1.7,4.4,4.2,7.1,9.3,15.2,15.6,16.7,14.7,11.0,7.8,3.9,1.9,3.6,4.1,6.6,10.6,15.0,15.8,15.8,12.2,9.2,4.4,1.1,3.6,5.4,5.3,5.4,13.3,15.6,14.9,16.9,14.2,8.9,9.3,4.9,1.4,2.9,4.8,6.7,10.8,14.4,16.4,15.4,12.8,9.4,6.9,3.5,3.8,2.3,4.4,7.5,11.4,12.2,16.1,15.0,14.2,10.3,5.8,2.7,2.5,1.4,3.1,6.9,12.8,14.3,15.8,15.9,14.2,7.8,3.3,5.3,3.3,5.8,2.5,8.1,12.2,14.7,16.9,18.3,14.4,9.4,6.9,5.3,2.5,1.8,1.8,6.3,10.4,14.8,15.4,15.8,14.2,9.2,7.1,6.0,5.3,3.6,5.3,6.8,12.3,11.9,17.2,15.6,13.8,10.1,6.7,4.7,4.0,6.7,8.2,7.7,10.7,14.2,17.2,15.0,15.2,9.2,4.0,4.2,4.0,1.5,6.2,7.1,9.3,14.9,15.3,14.6,12.6,8.3,4.3,3.0,3.2,3.1,5.6,6.8,10.3,14.8,15.6,15.7,13.9,10.6,6.5,4.2,2.2,3.6,6.0,7.5,12.1,14.6,15.2,15.7,13.1,10.0,4.6,4.4,3.3,2.8,3.4,6.7,12.2,13.6,14.7,15.7,13.9,10.4,5.7,3.6,2.2,1.2,3.9,10.0,9.4,15.7,15.0,14.6,13.5,8.4,4.7,3.9,4.4,4.6,6.0,6.7,9.1,13.8,16.1,14.7,13.6,9.4,3.9,2.9,0.3,4.0,4.9,8.1,10.7,14.0,18.4,15.2,13.3,8.2,7.1,3.2,2.6,3.8,5.2,7.2,13.8,14.6,14.2,16.4,11.9,8.1,5.7,3.9,5.9,5.8,6.1,8.6,12.1,15.0,18.2,16.3,13.5,10.9,5.1,2.5,1.9,3.8,6.6,9.4,11.7,15.2,16.9,15.8,15.7,9.2,5.7,6.1,5.4,5.8,6.8,9.4,11.9,14.3,15.8,16.4,14.2,9.4,6.2,4.4,4.7,4.0,3.7,10.0,12.9,16.9,17.8,15.3,13.6,7.9,4.6,3.6,0.8,4.9,5.4,8.9,10.2,14.6,15.3,15.3,13.1,8.3,5.8,6.2,3.7,3.8,3.9,7.2,12.2,13.9,16.1,15.2,12.5,8.9,4.4,2.8,4.8,0.4,5.0,7.5,11.4,13.8,15.7,15.3,13.3,9.2,3.9,1.7,0.7,1.7,4.2,8.1,9.7,13.7,15.7,16.6,13.3,9.3,7.2,3.3,0.1,5.4,4.7,7.3,10.0,12.8,14.4,16.1,14.1,9.2,6.9,3.3,0.8,4.8,4.7,8.1,12.2,13.9,15.6,16.0,11.7,9.2,5.6,4.6,2.5,2.7,5.0,7.8,11.3,13.1,16.4,15.0,12.8,8.2,5.7,4.8,3.7,4.6,2.5,5.4,10.0,13.1,15.3,15.8,13.9,8.9,5.3,3.6,1.0,3.2,3.1,5.5,12.2,14.3,15.7,14.3,12.2,9.2,6.3,5.6,1.2,1.9,4.4,6.4,10.1,16.1,16.9,16.1,13.0,11.7,7.2,4.8,4.0,2.6,6.5,8.3,10.3,14.7,15.9,17.2,12.4,9.9,5.3,3.8,0.6,4.3,6.4,8.6,10.9,14.7,16.1,16.1,12.5,10.3,4.8,3.5,4.6,6.1,6.0,9.8,12.6,16.6,16.7,15.8,14.3,9.3,4.8,4.5,1.6,3.8,6.4,9.4,10.8,14.1,16.3,15.2,12.9,10.2,6.2,4.4,1.9,2.3,6.8,7.2,11.7,13.6,15.3,15.9,14.6,10.2,6.9,2.6,1.9,3.2,4.6,8.2,10.6,15.4,17.3,16.8,12.2,7.4,6.7,6.1,2.9,7.9,7.9,9.4,11.9,14.4,17.9,17.6,15.2,10.9,5.7,3.1,0.9,2.1,7.9,6.3,12.8,14.2,16.8,17.6,15.6,9.1,4.4,3.2,2.1,4.8,6.6,9.2,12.1,16.2,17.4,17.3,14.2,10.6,6.5,5.4,5.2,1.9,4.1,5.2,9.0,14.9,15.6,14.2,13.3,7.6,2.3,2.8,3.4,3.3,3.3,10.1,10.4,14.8,18.8,15.8,12.8,9.8,6.2,2.7,0.6,1.4,2.7,5.7,13.5,13.7,15.2,14.0,14.8,7.8,5.5,0.3,3.4,0.4,1.2,8.4,12.3,16.1,16.1,13.9,13.6,8.7,5.6,2.8,2.7,3.4,2.1,8.1,11.2,16.1,15.0,15.1,11.7,7.5,3.3,2.8,3.6,5.9,6.8,7.4,11.5,13.9,15.8,15.6,12.8,9.8,4.5,3.8,3.9,3.8,3.6,9.4,13.8,15.4,15.8,15.8,13.4,9.8,6.1,0.3,1.5,5.0,2.1,7.4,12.5,14.0,15.4,16.6,13.1,8.6,4.6,6.1,4.3,6.6,6.4,6.1,11.9,14.6,14.9,15.6,12.2,10.3,6.1,4.3,4.3,4.7,6.5,9.6,10.6,14.7,15.3,15.9,13.8,8.9,5.9,1.1,2.3,4.5,5.9,10.0,10.2,13.2,15.3,16.9,11.8,8.8,7.1,4.3,2.8,4.6,4.2,6.2,10.9,13.5,17.6,15.0,11.7,11.3,6.0,5.3,1.8,7.2,7.0,10.2,11.3,15.7,18.1,15.5,12.5,9.6,6.1,3.7,3.1,0.8,3.9,7.7,10.9,13.2,15.2,16.6,16.0,11.7,4.5,6.6,7.3,4.7,4.2,10.2,10.3,13.9,14.7,15.9,14.6,8.1,4.6,0.3,3.5,4.6,4.3,7.4,11.3,13.6,17.3,15.8,12.5,8.2,4.7,4.8,3.6,4.0,5.1,10.4,12.9,16.9,16.3,16.4,13.6,9.9,4.7,1.5,1.7,2.8,3.4,5.4,9.6,14.1,15.2,14.4,12.9,8.3,5.6,1.3,2.8,2.2,4.0,9.3,12.3,13.9,17.7,16.8,13.9,9.2,5.4,3.3,4.6,4.8,6.7,8.3,12.1,14.8,16.1,17.1,14.2,10.2,4.8,1.5,1.6,3.7,5.6,8.9,10.6,13.7,13.5,17.2,13.8,10.1,5.1,3.6,1.8,3.4,6.3,9.1,10.5,13.7,17.6,16.1,11.4,9.3,5.0,4.4,5.8,2.9,4.7,6.9,13.3,16.1,15.9,15.6,14.2,10.7,6.6,2.1,2.1,4.1,6.2,8.3,10.2,13.2,16.0,16.4,14.6,8.2,4.8,3.6,4.2,4.3,5.1,6.8,12.1,14.9,15.4,16.2,13.4,10.6,7.8,6.8,2.8,3.7,2.9,7.7,11.8,14.2,17.1,16.9,10.5,11.4,2.9,1.8,2.6,2.8,3.2,5.8,13.7,14.8,18.4,16.7,12.7,7.2,6.0,2.2,2.0,5.7,6.0,5.2,13.1,13.7,15.1,14.8,12.7,10.2,4.6,4.1,2.2,3.5,4.9,8.2,9.2,14.6,15.2,14.6,13.9,9.8,5.4,3.6,1.2,4.6,7.1,8.9,12.8,14.1,16.1,14.4,13.7,12.3,7.7,3.1,2.6,5.3,3.5,5.5,10.9,13.0,14.2,14.3,13.2,9.3,4.9,1.7,1.9,5.8,6.8,7.6,11.6,13.6,15.0,14.5,12.5,8.1,4.3,2.8,2.9,1.4,2.9,9.6,9.2,12.2,16.0,14.7,12.8,8.1,4.7,4.3,0.3,6.5,7.3,8.1,12.6,14.3,14.9,15.3,13.4,10.3,3.4,2.3,2.7,2.1,3.9,6.6,9.9,12.8,13.4,13.9,11.8,10.3,3.9,3.1,4.5,6.4,5.5,7.6,8.7,15.1,14.1,13.6,13.2,6.4,9.1,2.5,4.4,2.7,4.5,6.9,11.3,16.4,18.2,15.3,13.3,12.0,9.5,3.6,4.4,4.3,6.8,8.6,11.5,13.4,16.4,17.4,13.4,9.1,4.1,1.4,0.3,3.2,4.7,8.9,11.4,13.6,15.7,14.7,12.3,8.1,5.6,4.7,3.6,2.1,5.7,9.5,9.4,12.3,14.8,16.4,14.9,10.4,8.6,6.4,4.7,6.3,7.8,8.3,12.7,17.1,15.6,15.2,12.4,10.7,8.2,1.6,0.1,3.1,5.0,6.7,12.2,12.3,14.1,14.4,12.5,8.4,7.1,4.8,4.3,4.7,4.6,7.4,10.7,13.4,16.0,15.1,13.7,9.5,7.2,5.1,3.8,3.9,5.0,9.1,11.6,14.1,17.2,16.3,15.1,10.8,5.2,4.6,0.4,6.4,6.3,8.8,11.2,17.3,17.9,17.6,13.6,11.1,4.4,5.8,1.7,0.7,5.9,8.9,11.9,14.2,16.5,14.8,13.7,11.4,6.9,6.9,5.1,5.2,6.6,8.3,12.4,15.4,16.0,15.3,14.3,10.2,7.4,7.4,0.3,4.3,4.3,6.7,12.5,14.9,15.1,14.3,11.3,8.3,4.5,1.4,0.2,2.2,7.7,8.9,12.0,12.7,16.2,13.7,11.9,10.4,6.9,1.9,1.6,4.8,7.2,9.2,11.5,15.4,16.7,16.9,13.7,12.7,5.6,5.8,3.1,3.4,5.8,8.6,10.9,15.2,15.9,15.4,13.6,10.7,5.9,5.1,1.2,5.6,3.9,7.7,15.1,14.6,15.8,14.3,12.1,10.1,6.6,6.9,7.1,5.6,7.1,7.7,13.0,15.4,16.9,16.2,13.8,10.6,6.7,5.6,2.9,5.7,5.8,8.6,11.3,15.0,16.4,16.9,13.4,8.9,6.6,3.1,3.7,3.5,5.8,7.2,11.1,15.3,15.4,14.6,11.7,8.6,5.3,4.1,2.7,4.7,2.3,4.7,9.9,15.5,16.9,15.7,12.5,10.5,5.2,5.3,1.5,0.4,4.9,6.1,10.5,14.4,15.6,15.1,12.7,9.8,4.6,4.0,2.8,4.1,4.2,6.4,10.2,14.3,14.9,14.6,12.4,9.3,7.3,3.7,4.1,3.6,3.8,9.7,11.4,14.1,13.8,15.9,11.1,7.5,11.4,1.3,1.1,2.4,7.5,7.8,12.7,12.9,13.8,14.6,13.4,8.7,5.2,4.4,0.6,4.2,6.4,7.8,11.4,15.6,14.5,17.1,13.2,7.2,5.5,7.2,4.0,1.9,5.6,8.5,10.4,12.8,14.8,15.3,14.4,7.9,5.7,7.4,3.8,1.6,4.7,9.8,10.8,14.7,15.4,13.5,13.3,9.2,5.9,0.4,3.2,0.9,2.0,8.6,9.5,14.9,14.3,13.5,11.4,9.5,6.7,4.6,6.3,6.4,6.1,7.8,12.3,18.2,16.5,16.6,14.7,9.5,6.9,0.5,2.2,2.4,5.6,6.6,12.3,13.9,17.5,15.2,11.5,10.7,7.9,4.8,1.3,6.1,5.9,8.2,13.9,14.5,15.6,13.6,12.8,9.7,5.8,5.6,3.9,5.7,6.1,6.4,12.1,13.9,15.4,15.6,13.3,9.2,6.6,3.4,0.7,6.4,4.7,9.0,10.1,15.4,16.2,14.5,12.3,7.9,7.4,4.6,5.6,4.7,5.8,7.4,10.4,14.3,14.6,15.5,12.7,10.8,3.1,4.8,4.9,4.7,5.2,8.2,10.6,13.2,18.7,15.8,12.9,7.8,7.9,7.7,5.1,0.6,3.4,7.6,10.9,14.3,14.9,14.7,12.3,10.1,5.2,1.3,3.6,4.3,6.7,9.2,10.3,13.2,15.4,15.2,14.4,9.4,4.9,5.1,2.4,1.7,3.3,7.1,8.8,13.3,16.8,15.7,13.2,9.7,5.3,2.4,3.7,5.3,4.4,8.1,9.4,13.5,15.3,16.9,12.5,10.7,4.8,4.4,2.6,4.3,5.3,7.5,11.3,15.8,16.4,17.4,14.5,11.3,7.2,7.3,3.4,1.8,4.9,7.7,10.8,16.8,14.8,15.8,14.7,9.6,4.3,4.8,4.9,5.7,7.3,7.5,11.7,14.8,18.3,16.2,12.9,9.6,4.8,1.6,3.5,1.7,4.7,5.9,11.5,12.3,14.5,13.6,11.2,9.8,4.5,1.5,1.7,4.9,6.3,7.5,10.4,14.8,15.0,15.9,13.1,11.8,4.2,3.9,3.9,5.1,5.5,8.8,12.3,12.7,14.2,14.6,13.0,10.3,3.2,6.5,4.9,5.8,6.5,8.8,10.6,13.6,15.2,15.5,11.6,9.9,7.3,6.3,2.4,2.3,4.7,8.8,12.4,13.6,15.5,14.2,13.3,9.8,5.5,3.7,2.1,2.3,2.9,10.6,12.6,15.6,16.6,15.1,16.3,9.7,6.7,5.8,5.8,4.4,4.8,8.6,10.0,15.5,15.5,14.7,12.8,10.7,6.9,6.1,1.2,6.9,3.1,9.3,11.4,14.1,14.9,16.3,13.6,9.3,4.8,3.4,3.9,6.3,6.8,8.7,13.5,15.5,18.3,16.8,14.3,8.4,4.9,7.2,5.6,7.5,3.8,10.1,9.6,13.2,17.3,15.5,14.4,9.7,5.8,2.9,3.3,2.8,4.7,9.2,11.7,15.2,17.5,15.7,12.9,9.5,4.7,0.6,0.5,6.1,7.3,8.7,11.3,12.8,15.2,17.2,12.7,9.8,3.4,3.6,5.0,6.9,6.8,8.2,9.7,14.1,17.1,15.3,13.2,8.4,7.0,5.3,5.2,1.8,5.4,7.7,9.9,14.2,16.2,15.4,11.8,8.6,6.3,5.3,5.5,3.9,6.7,9.8,10.0,13.9,17.3,15.1,13.6,10.4,5.6,0.2,6.4,2.3,5.1,8.6,12.3,14.2,14.8,16.1,14.9,8.9,5.4,4.2,3.2,4.8,4.5,7.9,9.6,14.3,17.2,16.5,12.7,11.3,6.1,6.0,5.5,6.2,4.9,7.0,9.1,15.2,14.7,15.2,11.2,9.3,7.1,4.7,4.6,5.6,5.4,8.9,11.8,15.1,16.6,16.2,13.3,10.2,3.5,0.3,0.7,3.1,4.7,5.7,8.9,12.9,13.6,14.5,12.6,8.9,4.1,0.7,0.9,5.8,6.2,7.9,10.4,13.8,15.5,16.4,14.6,7.1,5.4,5.1,1.5,3.2,5.3,7.3,11.8,13.7,16.2,13.9,12.7,7.3,8.9,3.9,5.2,6.1,7.4,8.4,11.5,13.1,15.2,14.9,12.1,9.9,5.7,3.9,4.7,5.9,1.8,8.1,10.6,13.9,14.5,15.3,13.3,9.7,5.8,4.6,6.5,5.3,6.5,7.2,11.3,14.1,16.3,17.2,14.5,9.4,5.3,4.4,2.9,5.8,4.5,7.7,8.9,13.9,16.3,13.6,12.2,7.5,5.9,3.7,2.1,1.5,4.2,7.6,10.3,13.5,15.9,15.8,13.6,11.3,6.6,1.9,2.4,3.8,3.3,6.2,9.4,15.3,17.3,15.7,11.8,7.1,4.4,2.6,3.2,1.8,3.1,6.2,10.7,13.2,13.7,14.1,12.2,7.9,7.7,4.9,3.4,2.9,4.7,7.1,12.9,15.3,15.3,14.7,12.8,8.6,6.9,3.3,5.7,3.1,6.2,7.1,11.7,13.5,14.5,14.1,14.6,9.4,5.7,0.8,1.3,3.9,3.8,6.2,9.5,14.7,15.1,14.1,14.2,9.4,5.6,4.1,2.3,3.6,2.7,7.3,11.6,13.4,14.3,15.2,12.4,7.1,6.4,1.8,2.2,4.7,7.2,10.3,13.1,15.6,16.4,17.4,12.9,9.9,5.2,4.8,3.4,5.1,6.7,9.7,9.2,13.5,15.9,14.2,11.6,9.3,7.9,5.1,0.2,1.8,5.1,8.2,12.4,14.8,15.2,15.8,15.4,7.1,7.5,3.9,4.8,4.6,6.7,9.1,11.9,16.2,16.2,14.3,13.1,6.9,4.3,3.9,1.6,5.8,6.5,7.1,10.0,15.1,16.5,16.2,12.1,9.9,7.6,4.7,6.6,4.8,4.3,8.5,10.2,13.6,15.3,16.5,15.2,11.3,7.2,7.3,4.9,5.1,5.1,7.8,9.9,15.7,17.3,17.8,13.2,8.8,8.5,2.2,4.4,2.6,3.7,8.3,10.3,14.7,17.7,15.1,13.6,9.8,7.3,7.2,3.5,2.3,4.1,8.6,11.5,13.9,18.0,15.6,13.9,9.7,4.8,3.4,4.7,1.5,6.7,7.5,8.9,13.9,14.7,14.3,12.8,9.6,6.8,4.6,4.2,7.1,7.1,6.4,11.1,13.0,15.3,14.3,13.1,10.5,6.4,3.3,4.1,3.4,4.3,8.7,10.9,13.3,17.1,15.1,12.5,9.7,5.2,3.7,3.6,5.2,6.8,7.3,10.8,14.7,17.2,14.7,12.4,7.1,4.9,4.9,5.3,3.1,5.1,7.3,10.5,14.3,15.8,16.7,13.9,10.9,7.3,3.0,3.6,2.8,6.3,7.6,10.5,12.4,14.1,14.3,13.6,9.8,6.5,4.6,2.5,5.3,4.3,6.0,12.4,14.3,15.8,14.6,12.9,12.9,7.4,3.9,3.5,2.9,3.7,8.7,11.0,11.8,14.6,15.4,11.9,10.4,4.8,3.9,3.5,5.1,5.6,7.3,11.2,14.7,14.2,15.2,12.5,10.6,3.2,6.4,3.8,4.8,5.2,7.5,12.9,14.5,18.2,18.2,13.9,9.3,6.1,6.2,3.6,5.4,7.2,8.8,12.1,13.9,16.1,12.9,11.1,8.2,6.3,6.7,4.5,4.8,6.2,7.9,11.4,14.3,14.6,15.2,14.1,10.9,8.4,5.1,3.7,6.8,6.1,9.8,10.8,14.5,15.8,16.1,13.3,10.3,6.8,4.6,4.1,4.3,5.2,7.9,10.8,14.4,14.6,15.3,13.4,9.1,2.8,5.3,7.5,3.8,3.3,8.2,11.6,11.8,15.3,16.4,13.0,10.6,6.8,1.9,1.6,0.9,3.2,5.4,12.8,15.2,16.1,15.3,14.0,7.5,7.8,2.3,3.8,6.5,5.7,6.7,13.0,13.3,15.4,16.1,11.9,9.3,5.5,6.9,2.9,1.9,3.6,7.1,13.5,14.3,13.9,15.7,12.7,7.4,3.3,5.5,5.2,6.0,7.2,8.2,11.8,14.4,14.1,13.6,13.0,10.4,6.8,4.2,7.3,4.8,7.4,8.0,11.5,14.7,18.5,15.4,14.1,12.8,4.6,6.5,3.7,4.4,4.6,5.5,12.7,13.8,13.7,13.6,12.2,8.2,5.9,5.8,5.6,5.6,6.5,7.6,9.2,12.5,17.5,15.2,12.5,9.7,3.3,3.8,4.7,3.3,4.1,6.9,11.6,13.9,15.3,14.1,13.3,10.1,7.1,6.8,5.3,5.2,4.9,7.5,11.6,15.0,16.8,15.4,11.5,10.4,3.6,2.8,4.6,6.8,6.3,9.3,10.2,13.6,17.1,16.2,14.4,8.1,5.9,4.2,4.6,3.9,7.3,7.9,11.2,12.6,15.9,15.7,12.5,10.5,6.2,2.1,5.2,5.8,6.3,8.5,10.9,12.9,16.1,15.3,12.8,10.1,7.6,3.4,1.3,0.4,6.2,6.8,11.3,13.3,16.0,15.4,15.3,9.6,6.7,5.8,5.6,2.5,5.3,8.3,10.7,15.3,15.2,15.7,13.6,10.5,6.2,4.3,3.2,3.9,4.2,7.7,11.4,14.4,15.3,14.4,11.5,8.8,7.8,5.3,6.3,2.9,4.7,6.9,10.5,14.1,16.1,17.1,12.9,8.8,6.6,5.7,2.2,4.3,7.3,8.8,12.2,15.6,17.8,17.6,14.9,10.1,5.6,1.6,4.1,3.8,4.8,8.0,11.3,14.9,18.2,15.4,14.6,10.6,6.1,8.1,4.5,5.8,6.6,8.2,9.9,15.1,17.1,16.6,13.6,9.5,6.9,2.8,3.7,2.6,7.1,6.3,11.5,14.7,15.3,16.1,14.4,9.4,5.5,5.3,5.2,5.6,3.6,9.2,12.2,14.1,16.1,16.9,13.4,10.4,5.1,3.0,5.7,5.1,9.1,7.6,10.7,14.4,15.2,16.3,13.8,10.5,9.4,4.4,4.2,5.6,5.8,8.8,11.4,14.2,15.5,16.4,14.2,8.2,8.7,3.2,1.4,2.6,6.0,8.7,12.5,16.4,15.1,15.6,12.8,9.6,6.9,3.8,0.5,3.5,5.1,6.4,9.4,15.1,17.3,14.7,14.5,10.4,6.6,5.6,0.9,0.1,5.2,9.2,11.1,14.4,15.5,16.6,13.6,10.4,4.9,6.7,4.9,6.1,6.5,10.5,11.8,14.4,16.4,16.1,13.3,10.6,6.3,3.5,5.8,3.6,5.2,10.2,11.4,13.5,16.5,17.0,12.5,9.3,6.2,3.6,0.4,7.1,7.9,10.1,12.2,14.6,16.7,15.9,14.4,11.9,7.2,4.9,2.7,5.9,5.1,9.9,10.7,13.1,16.3,14.7,14.0,9.8,8.1,3.1,2.2,1.9,3.6,8.6,13.5,15.5,17.0,18.6,14.9,10.6,7.2,5.1,5.4,4.7,8.3,9.0,11.4,13.5,15.8,15.1,13.8,10.1,7.3,5.7,5.5,5.7,5.1,10.0,11.2,15.3,17.4,16.8,16.3,11.7,6.6,5.8,4.2,5.3,7.4,7.6,11.3,16.2,15.9,15.6,12.9,9.6,5.7,1.2,3.9,3.7,4.1,6.8,10.1,14.0,16.3,14.8,14.1,9.4,8.5,5.5,2.7,3.4,6.6,9.6,13.4,14.4,16.8,15.8,10.7,8.8,4.1,2.8,3.3,4.3,5.6,7.3,12.6,14.4,15.5,16.2,13.8,9.7,8.5,6.9,2.9,2.6,5.8,7.6,11.2,13.4,14.2,14.6,12.7,11.9,6.9,6.8,2.6,1.2,3.2,9.3,9.7,13.8,17.7,18.1,14.2,9.2,7.0,5.4,3.6,0.2,6.2,6.9,11.7,13.1,15.8,13.5,14.3,9.4,6.0,5.7,5.5,5.3,9.2,8.9,10.3,15.2,16.3,15.4,12.5,10.8,6.4,4.5,3.4,4.7,3.7,7.4,11.1,14.1,15.9,15.8,15.1,10.8,6.4,4.7,1.6,4.4,7.3,9.4,12.8,15.2,17.3,17.2,14.9,12.6,7.1,6.0,3.8,4.1,6.4,8.9,12.8,16.1,15.1,15.0,13.1,10.3,7.3,3.9,3.9,6.9,8.2,10.0,11.0,14.4,15.2,15.4,15.2,10.9,6.0,2.2,4.3,4.4,2.8,7.7,10.3,13.7,15.1,14.5,12.6,10.4,5.5,1.8,2.1,0.7,6.0,8.7,10.6,14.9,15.2,14.3,12.9,11.1,8.2,2.6,3.4,4.5,4.3,8.7,13.3,13.8,16.1,15.5,14.1,8.9,7.4,3.6,3.3,3.1,5.2,8.0,11.7,14.7,14.0,14.9,12.3,11.0,4.5,4.7,2.9,5.7,6.5,7.2,11.0,15.4,15.0,14.7,13.8,10.1,5.6,5.5,4.5,5.4,7.0,7.7,10.4,14.0,16.7,15.7,13.5,10.8,5.4,4.2,4.4,1.9,6.3,8.1,9.8,14.8,15.0,15.4,13.9,12.5,6.5,3.0,5.5,1.0,3.3,7.4,11.2,13.9,16.8,16.4,13.9,13.0,5.4,3.3,3.7,2.9,3.7,6.7,13.0,16.4,15.2,16.0,14.4,10.7,7.8,4.3]; // spot test against values taken from the reference implementation let test = asap_smooth(&data, 100); assert_eq!(test.len(), 93); assert!((test[10] - 9.021034).abs() < 0.000001); assert!((test[20] - 9.19).abs() < 0.000001); assert!((test[30] - 9.068966).abs() < 0.000001); assert!((test[40] - 9.237586).abs() < 0.000001); assert!((test[50] - 9.145172).abs() < 0.000001); assert!((test[60] - 9.014483).abs() < 0.000001); assert!((test[70] - 9.293448).abs() < 0.000001); assert!((test[80] - 9.417931).abs() < 0.000001); assert!((test[90] - 9.602069).abs() < 0.000001); } } ================================================ FILE: crates/count-min-sketch/Cargo.toml ================================================ [package] name = "countminsketch" version = "0.1.0" edition = "2021" [dependencies] rand = "0.8.4" serde = { version = "1.0", features = ["derive"] } ================================================ FILE: crates/count-min-sketch/src/lib.rs ================================================ //! Count-Min Sketch implementation in Rust //! //! Based on the paper: //! use std::{ fmt, hash::{Hash, Hasher}, }; #[allow(deprecated)] use std::hash::SipHasher; use serde::{Deserialize, Serialize}; /// The CountMinHashFn is a data structure used to hash items that are being /// added to a Count-Min Sketch. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] #[repr(C)] pub struct CountMinHashFn { key: u64, } const SEED: u64 = 0x517cc1b727220a95; // from FxHash impl CountMinHashFn { /// Creates a new CountMinHashFn whose hash function key is equal to `key`. pub fn with_key(key: u64) -> Self { Self { key } } /// Computes the hash of `item` according to the hash function and returns /// the bucket index corresponding to the hashed value. /// /// The returned value will be between 0 and (`nbuckets` - 1). #[allow(deprecated)] pub fn hash_into_buckets(&self, item: &T, nbuckets: usize) -> usize { let (key1, key2) = (self.key, SEED); let mut hasher = SipHasher::new_with_keys(key1, key2); item.hash(&mut hasher); let hash_val = hasher.finish(); (hash_val % (nbuckets as u64)) as usize } /// Returns the key for the hash function. pub(crate) fn key(&self) -> u64 { self.key } } /// The Count-Min Sketch is a compact summary data structure capable of /// representing a high-dimensional vector and answering queries on this vector, /// in particular point queries and dot product queries, with strong accuracy /// guarantees. Such queries are at the core of many computations, so the /// structure can be used in order to answer a variety of other queries, such as /// frequent items (heavy hitters), quantile finding, join size estimation, and /// more. Since the data structure can easily process updates in the form of /// additions or subtractions to dimensions of the vector (which may correspond /// to insertions or deletions, or other transactions), it is capable of working /// over streams of updates, at high rates.[1] /// /// [1]: #[derive(Clone, Debug, Serialize, Deserialize)] pub struct CountMinSketch { width: usize, depth: usize, // hashfuncs must be at least `depth` in length hashfuncs: Vec, // The outer and inner `Vec`s must be `depth` and `width` long, respectively counters: Vec>, } impl CountMinSketch { /// Constructs a new Count-Min Sketch with the specified dimensions, using /// `hashfuncs` to construct the underlying hash functions and `counters` to /// populate the sketch with any data. pub fn new( width: usize, depth: usize, hashfuncs: Vec, counters: Vec>, ) -> Self { assert_eq!(hashfuncs.len(), depth); assert_eq!(counters.len(), depth); assert_eq!(counters[0].len(), width); Self { width, depth, hashfuncs, counters, } } /// Constructs a new, empty Count-Min Sketch with the specified dimensions, /// using `keys` to seed the underlying hash functions. pub fn with_dims_and_hashfn_keys(width: usize, depth: usize, keys: Vec) -> Self { assert_eq!(keys.len(), depth); Self { width, depth, hashfuncs: keys .iter() .map(|key| CountMinHashFn::with_key(*key)) .collect(), counters: vec![vec![0; width]; depth], } } /// Constructs a new, empty Count-Min Sketch with the specified dimensions. pub fn with_dim(width: usize, depth: usize) -> Self { let keys = (1..=depth).map(|k| k as u64).collect::>(); CountMinSketch::with_dims_and_hashfn_keys(width, depth, keys) } /// Constructs a new, empty Count-Min Sketch whose dimensions will be /// derived from the parameters. /// /// Then for any element *i*, an estimate of its count, âᵢ, will have the /// guarantee: /// aᵢ ≤ âᵢ ≤ aᵢ + ϵN with probability 1-δ /// where aᵢ is the true count of element *i* /// /// Thus `epsilon` controls the error of the estimated count, relative to /// the total number of items seen, and `delta` determines the probability /// that the estimate will exceed the true count beyond the epsilon error /// term. /// /// To accommodate this result, the sketch will have a width of ⌈e/ε⌉ and a /// depth of ⌈ln(1/δ)⌉. pub fn with_prob(epsilon: f64, delta: f64) -> Self { assert!(0.0 < epsilon && epsilon < 1.0); assert!(0.0 < delta && delta < 1.0); let width = (1f64.exp() / epsilon).ceil() as usize; let depth = (1f64 / delta).ln().ceil() as usize; CountMinSketch::with_dim(width, depth) } /// Returns the width of the sketch. pub fn width(&self) -> usize { self.width } /// Returns the depth of the sketch. pub fn depth(&self) -> usize { self.depth } /// Returns a vector containing the keys of the hash functions used with the /// sketch. pub fn hash_keys(&self) -> Vec { self.hashfuncs.iter().map(|f| f.key()).collect() } /// Returns a nested vector representing the sketch's counter table. Each /// element in the outer vector corresponds to a row of the counter table, /// and each element of the inner vector corresponds to the tally in that /// bucket for a given row. pub fn counters(&self) -> &Vec> { &self.counters } /// Returns an estimate of the number of times `item` has been seen by the /// sketch. pub fn estimate(&self, item: T) -> i64 { let buckets = self .hashfuncs .iter() .map(|h| h.hash_into_buckets(&item, self.width)); self.counters .iter() .zip(buckets) .map(|(counter, bucket)| counter[bucket]) .min() .unwrap() } /// Returns a vector of the indices for the buckets into which `item` hashes. /// /// The vector will have `self.depth` elements, each in the range /// [0, self.width-1]. pub fn get_bucket_indices(&self, item: T) -> Vec { self.hashfuncs .iter() .map(|h| h.hash_into_buckets(&item, self.width)) .collect() } /// Adds the given `item` to the sketch. pub fn add_value(&mut self, item: T) { for i in 0..self.depth { let bucket = self.hashfuncs[i].hash_into_buckets(&item, self.width); self.counters[i][bucket] += 1; } } /// Subtract the given `item` from the sketch. pub fn subtract_value(&mut self, item: T) { for i in 0..self.depth { let bucket = self.hashfuncs[i].hash_into_buckets(&item, self.width); self.counters[i][bucket] -= 1; } } /// Includes the counts from `other` into `self` via elementwise addition of /// the counter vectors. /// /// The underlying `CountMinHashFn`s in each sketch must have the same keys. pub fn combine(&mut self, other: CountMinSketch) { assert_eq!(self.width, other.width); assert_eq!(self.depth, other.depth); assert_eq!(self.hashfuncs, other.hashfuncs); for (counter1, counter2) in self.counters.iter_mut().zip(other.counters) { for (val1, val2) in counter1.iter_mut().zip(counter2) { *val1 += val2; } } } } impl fmt::Display for CountMinSketch { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "Count-Min Sketch:")?; write!(f, "+------++")?; for _ in 0..self.width { write!(f, "--------+")?; } writeln!(f)?; write!(f, "| ||")?; for b in 0..self.width { write!(f, " {b:>3} |")?; } writeln!(f)?; write!(f, "+======++")?; for _ in 0..self.width { write!(f, "========+")?; } writeln!(f)?; for n in 0..self.depth { write!(f, "| {n:>3} ||")?; for x in &self.counters[n] { write!(f, " {x:>6} |")?; } writeln!(f)?; } write!(f, "+------++")?; for _ in 0..self.width { write!(f, "--------+")?; } writeln!(f) } } ================================================ FILE: crates/count-min-sketch/tests/lib.rs ================================================ use countminsketch::CountMinSketch; #[test] fn empty_sketch() { let cms = CountMinSketch::with_dim(1, 1); assert_eq!(cms.estimate("foo"), 0); } #[test] fn add_once() { let mut cms = CountMinSketch::with_dim(2, 2); cms.add_value("foo"); assert_eq!(cms.estimate("foo"), 1); } #[test] fn subtract_is_inverse_of_add() { let mut cms = CountMinSketch::with_dim(2, 2); cms.add_value("foo"); cms.subtract_value("foo"); assert_eq!(cms.estimate("foo"), 0); } #[test] fn add_repeated() { let mut cms = CountMinSketch::with_dim(2, 2); for _ in 0..100_000 { cms.add_value("foo"); } assert_eq!(cms.estimate("foo"), 100_000); } #[test] fn add_repeated_with_collisions() { // if sketch has width = 2 and we add 3 items, then we // are guaranteed that we will have at least one hash // collision in every row let mut cms = CountMinSketch::with_dim(2, 5); for _ in 0..100_000 { cms.add_value("foo") } for _ in 0..1_000 { cms.add_value("bar") } for _ in 0..1_000_000 { cms.add_value("baz") } let foo_est = cms.estimate("foo"); let bar_est = cms.estimate("bar"); let baz_est = cms.estimate("baz"); let err_margin = (0.01 * (100_000f64 + 1_000f64 + 1_000_000f64)) as i64; assert!(100_000 <= foo_est && foo_est < (100_000 + err_margin)); assert!(1_000 <= bar_est && bar_est < (1_000 + err_margin)); assert!(1_000_000 <= baz_est && baz_est < (1_000_000 + err_margin)); } ================================================ FILE: crates/counter-agg/Cargo.toml ================================================ [package] name = "counter-agg" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] flat_serialize = {path="../flat_serialize/flat_serialize"} flat_serialize_macro = {path="../flat_serialize/flat_serialize_macro"} serde = { version = "1.0", features = ["derive"] } stats_agg = {path="../stats-agg"} tspoint = {path="../tspoint"} [dev-dependencies] approx = "0.5.1" ================================================ FILE: crates/counter-agg/src/lib.rs ================================================ use serde::{Deserialize, Serialize}; use stats_agg::{stats2d::StatsSummary2D, XYPair}; use std::fmt; use tspoint::TSPoint; pub mod range; #[cfg(test)] mod tests; #[derive(Debug, PartialEq, Eq)] pub enum CounterError { OrderError, BoundsInvalid, } // TODO Intent is for this to be immutable with mutations going through (and // internal consistency protected by) the builders below. But, we allow raw // access to the extension to allow it to (de)serialize, so the separation is // but a fiction for now. If the only consequence of corruption is // nonsensical results rather than unsound behavior, garbage in garbage out. // But much better if we can validate at deserialization. We can do that in // the builder if we want. #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct MetricSummary { // TODO invariants? pub first: TSPoint, pub second: TSPoint, pub penultimate: TSPoint, pub last: TSPoint, // Invariants: // - num_changes > 0 if num_resets > 0 // - num_resets > 0 if num_changes > 0 // - reset_sum > 0 if num_resets > 0 // - num_resets > 0 if reset_sum > 0 pub reset_sum: f64, pub num_resets: u64, pub num_changes: u64, // TODO Protect from deserialization? Is there any risk other than giving // nonsensical results? If so, maybe it's fine to just accept garbage // out upon garbage in. pub stats: StatsSummary2D, // TODO See TODOs in I64Range about protecting from deserialization. pub bounds: Option, } // Note that this can lose fidelity with the timestamp, but it would only lose it in the microseconds, // this is likely okay in most applications. However, if you need better regression analysis at the subsecond level, // you can always subtract a common near value from all your times, then add it back in, the regression analysis will be unchanged. // Note that convert the timestamp into seconds rather than microseconds here so that the slope and any other regression analysis, is done on a per-second basis. // For instance, the slope will be the per-second slope, not the per-microsecond slope. The x intercept value will need to be converted back to microseconds so you get a timestamp out. fn ts_to_xy(pt: TSPoint) -> XYPair { XYPair { x: to_seconds(pt.ts as f64), y: pt.val, } } fn to_seconds(t: f64) -> f64 { t / 1_000_000_f64 // by default postgres timestamps have microsecond precision } /// MetricSummary tracks monotonically increasing counters that may reset, ie every time the value decreases /// it is treated as a reset of the counter and the previous value is added to the "true value" of the /// counter at that timestamp. impl MetricSummary { pub fn new(pt: &TSPoint, bounds: Option) -> MetricSummary { let mut n = MetricSummary { first: *pt, second: *pt, penultimate: *pt, last: *pt, reset_sum: 0.0, num_resets: 0, num_changes: 0, stats: StatsSummary2D::new(), bounds, }; n.stats.accum(ts_to_xy(*pt)).unwrap(); n } fn reset(&mut self, incoming: &TSPoint) { if incoming.val < self.last.val { self.reset_sum += self.last.val; self.num_resets += 1; } } // expects time-ordered input fn add_point(&mut self, incoming: &TSPoint) -> Result<(), CounterError> { if incoming.ts < self.last.ts { return Err(CounterError::OrderError); } //TODO: test this if incoming.ts == self.last.ts { // if two points are equal we only use the first we see // see discussion at https://github.com/timescale/timescaledb-toolkit/discussions/65 return Ok(()); } // right now we treat a counter reset that goes to exactly zero as a change (not sure that's correct, but it seems defensible) // These values are not rounded, so direct comparison is valid. if incoming.val != self.last.val { self.num_changes += 1; } if self.first == self.second { self.second = *incoming; } self.penultimate = self.last; self.last = *incoming; let mut incoming_xy = ts_to_xy(*incoming); incoming_xy.y += self.reset_sum; self.stats.accum(incoming_xy).unwrap(); Ok(()) } fn single_value(&self) -> bool { self.last == self.first } // combining can only happen for disjoint time ranges fn combine(&mut self, incoming: &MetricSummary) -> Result<(), CounterError> { // this requires that self comes before incoming in time order if self.last.ts >= incoming.first.ts { return Err(CounterError::OrderError); } // These values are not rounded, so direct comparison is valid. if self.last.val != incoming.first.val { self.num_changes += 1; } if incoming.single_value() { self.penultimate = self.last; } else { self.penultimate = incoming.penultimate; } if self.single_value() { self.second = incoming.first; } let mut stats = incoming.stats; // have to offset based on our reset_sum, including the amount we added based on any resets that happened at the boundary (but before we add in the incoming reset_sum) stats .offset(XYPair { x: 0.0, y: self.reset_sum, }) .unwrap(); self.last = incoming.last; self.reset_sum += incoming.reset_sum; self.num_resets += incoming.num_resets; self.num_changes += incoming.num_changes; self.stats = self.stats.combine(stats).unwrap(); self.bounds_extend(incoming.bounds); Ok(()) } pub fn time_delta(&self) -> f64 { to_seconds((self.last.ts - self.first.ts) as f64) } pub fn delta(&self) -> f64 { self.last.val + self.reset_sum - self.first.val } pub fn rate(&self) -> Option { if self.single_value() { return None; } Some(self.delta() / self.time_delta()) } pub fn idelta_left(&self) -> f64 { //check for counter reset if self.second.val >= self.first.val { self.second.val - self.first.val } else { self.second.val // counter reset assumes it reset at the previous point, so we just return the second point } } pub fn idelta_right(&self) -> f64 { //check for counter reset if self.last.val >= self.penultimate.val { self.last.val - self.penultimate.val } else { self.last.val } } pub fn irate_left(&self) -> Option { if self.single_value() { None } else { Some(self.idelta_left() / to_seconds((self.second.ts - self.first.ts) as f64)) } } pub fn irate_right(&self) -> Option { if self.single_value() { None } else { Some(self.idelta_right() / to_seconds((self.last.ts - self.penultimate.ts) as f64)) } } pub fn bounds_valid(&self) -> bool { match self.bounds { None => true, // unbounded contains everything Some(b) => b.contains(self.last.ts) && b.contains(self.first.ts), } } fn bounds_extend(&mut self, in_bounds: Option) { match (self.bounds, in_bounds) { (None, _) => self.bounds = in_bounds, (_, None) => {} (Some(mut a), Some(b)) => { a.extend(&b); self.bounds = Some(a); } }; } // based on: https://github.com/timescale/promscale_extension/blob/d51a0958442f66cb78d38b584a10100f0d278298/src/lib.rs#L208, // which is based on: // https://github.com/prometheus/prometheus/blob/e5ffa8c9a08a5ee4185271c8c26051ddc1388b7a/promql/functions.go#L59 pub fn prometheus_delta(&self) -> Result, CounterError> { if self.bounds.is_none() || !self.bounds_valid() || self.bounds.unwrap().has_infinite() { return Err(CounterError::BoundsInvalid); } //must have at least 2 values if self.single_value() || self.bounds.unwrap().is_singleton() { //technically, the is_singleton check is redundant, it's included for clarity (any singleton bound that is valid can only be one point) return Ok(None); } let mut result_val = self.delta(); // all calculated durations in seconds in Prom implementation, so we'll do that here. // we can unwrap all of the bounds accesses as they are guaranteed to be there from the checks above let mut duration_to_start = to_seconds((self.first.ts - self.bounds.unwrap().left.unwrap()) as f64); /* bounds stores [L,H), but Prom takes the duration using the inclusive range [L, H-1ms]. Subtract an extra ms, ours is in microseconds. */ let duration_to_end = to_seconds((self.bounds.unwrap().right.unwrap() - self.last.ts - 1_000) as f64); let sampled_interval = self.time_delta(); let avg_duration_between_samples = sampled_interval / (self.stats.n - 1) as f64; // don't have to worry about divide by zero because we know we have at least 2 values from the above. // we don't want to extrapolate to negative counter values, so we calculate the duration to the zero point of the counter (based on what we know here) and set that as duration_to_start if it's smaller than duration_to_start if result_val > 0.0 && self.first.val >= 0.0 { let duration_to_zero = sampled_interval * (self.first.val / result_val); if duration_to_zero < duration_to_start { duration_to_start = duration_to_zero; } } // If the first/last samples are close to the boundaries of the range, // extrapolate the result. This is as we expect that another sample // will exist given the spacing between samples we've seen thus far, // with an allowance for noise. // Otherwise, we extrapolate to one half the avg distance between samples... // this was empirically shown to be good for certain things and was discussed at length in: https://github.com/prometheus/prometheus/pull/1161 let extrapolation_threshold = avg_duration_between_samples * 1.1; let mut extrapolate_to_interval = sampled_interval; if duration_to_start < extrapolation_threshold { extrapolate_to_interval += duration_to_start } else { extrapolate_to_interval += avg_duration_between_samples / 2.0 } if duration_to_end < extrapolation_threshold { extrapolate_to_interval += duration_to_end } else { extrapolate_to_interval += avg_duration_between_samples / 2.0 } result_val *= extrapolate_to_interval / sampled_interval; Ok(Some(result_val)) } pub fn prometheus_rate(&self) -> Result, CounterError> { let delta = self.prometheus_delta()?; if delta.is_none() { return Ok(None); } let delta = delta.unwrap(); let bounds = self.bounds.unwrap(); // if we got through delta without error then we have bounds let duration = bounds.duration().unwrap() - 1_000; // bounds stores [L,H), but Prom takes the duration using the inclusive range [L, H-1ms]. So subtract an extra ms from the duration if duration <= 0 { return Ok(None); // if we have a total duration under a ms, it's less than prom could deal with so we return none. } Ok(Some(delta / to_seconds(duration as f64))) // don't have to deal with 0 case because that is checked in delta as well (singleton) } } impl fmt::Display for CounterError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { match self { CounterError::OrderError => write!( f, "out of order points: points must be submitted in time-order" ), CounterError::BoundsInvalid => write!(f, "cannot calculate delta without valid bounds"), } } } #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] pub struct GaugeSummaryBuilder(MetricSummary); impl GaugeSummaryBuilder { pub fn new(pt: &TSPoint, bounds: Option) -> Self { Self(MetricSummary::new(pt, bounds)) } /// expects time-ordered input pub fn add_point(&mut self, incoming: &TSPoint) -> Result<(), CounterError> { self.0.add_point(incoming) } /// combining can only happen for disjoint time ranges pub fn combine(&mut self, incoming: &MetricSummary) -> Result<(), CounterError> { self.0.combine(incoming) } pub fn set_bounds(&mut self, bounds: Option) { self.0.bounds = bounds; } pub fn build(self) -> MetricSummary { self.0 } pub fn first(&self) -> &TSPoint { &self.0.first } // TODO build method should check validity rather than caller pub fn bounds_valid(&self) -> bool { self.0.bounds_valid() } } impl From for GaugeSummaryBuilder { fn from(summary: MetricSummary) -> Self { Self(summary) } } #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] pub struct CounterSummaryBuilder(MetricSummary); impl CounterSummaryBuilder { pub fn new(pt: &TSPoint, bounds: Option) -> Self { Self(MetricSummary::new(pt, bounds)) } /// expects time-ordered input pub fn add_point(&mut self, incoming: &TSPoint) -> Result<(), CounterError> { self.0.reset(incoming); self.0.add_point(incoming) } /// combining can only happen for disjoint time ranges pub fn combine(&mut self, incoming: &MetricSummary) -> Result<(), CounterError> { self.0.reset(&incoming.first); self.0.combine(incoming) } pub fn set_bounds(&mut self, bounds: Option) { self.0.bounds = bounds; } pub fn build(self) -> MetricSummary { self.0 } pub fn first(&self) -> &TSPoint { &self.0.first } // TODO build method should check validity rather than caller pub fn bounds_valid(&self) -> bool { self.0.bounds_valid() } } impl From for CounterSummaryBuilder { fn from(summary: MetricSummary) -> Self { Self(summary) } } ================================================ FILE: crates/counter-agg/src/range.rs ================================================ use serde::{Deserialize, Serialize}; use std::cmp::{max, min}; // we always store ranges as half open, inclusive on left, exclusive on right, // we are a discrete type so translating is simple [), this enforces equality // between ranges like [0, 10) and [0, 9] // None values denote infinite bounds on that side #[derive(Debug, PartialEq, Eq, Copy, Clone, Serialize, Deserialize)] #[repr(C)] pub struct I64Range { pub left: Option, pub right: Option, } impl I64Range { pub fn has_infinite(&self) -> bool { self.left.is_none() || self.right.is_none() } // TODO See TODO below about range validity. Right now we don't care // much. If we start to care, move the caring to `new` and `extend` // methods. That will allow this crate to protect the integrity of // MetricSummary and I64Range in the face of the extension needing to be // able to construct them from raw (and therefore potentially // corrupt) inputs. fn is_valid(&self) -> bool { match (self.left, self.right) { (Some(a), Some(b)) => a <= b, _ => true, } } pub fn is_singleton(&self) -> bool { match (self.left, self.right) { (Some(a), Some(b)) => a == b, _ => false, } } pub fn extend(&mut self, other: &Self) { // TODO: What should extend do with invalid ranges on either side? right now it treats them as if they are real... self.left = match (self.left, other.left) { (None, _) => None, (_, None) => None, (Some(a), Some(b)) => Some(min(a, b)), }; self.right = match (self.right, other.right) { (None, _) => None, (_, None) => None, (Some(a), Some(b)) => Some(max(a, b)), }; } pub fn contains(&self, pt: i64) -> bool { match (self.left, self.right) { (Some(l), Some(r)) => pt >= l && pt < r, (Some(l), None) => pt >= l, (None, Some(r)) => pt < r, (None, None) => true, } } // pub fn contains(&self, other: I64Range) -> bool{ // unimplemented!() // } pub fn duration(&self) -> Option { if self.has_infinite() || !self.is_valid() { return None; } Some(self.right.unwrap() - self.left.unwrap()) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_extend() { let mut a = I64Range { left: Some(4), right: Some(5), }; let b = I64Range { left: Some(3), right: Some(6), }; a.extend(&b); // b completely covers a assert_eq!(a, b); // extend to left let c = I64Range { left: Some(2), right: Some(5), }; a.extend(&c); assert_eq!( a, I64Range { left: Some(2), right: Some(6) } ); // extend to right let d = I64Range { left: Some(6), right: Some(9), }; a.extend(&d); assert_eq!( a, I64Range { left: Some(2), right: Some(9) } ); // infinites let e = I64Range { left: Some(10), right: None, }; a.extend(&e); assert_eq!( a, I64Range { left: Some(2), right: None } ); let f = I64Range { left: None, right: Some(5), }; a.extend(&f); assert_eq!( a, I64Range { left: None, right: None } ); // if a range contains another, it's unaffected a.extend(&c); assert_eq!( a, I64Range { left: None, right: None } ); // whether infinite or not let mut a = I64Range { left: Some(2), right: Some(9), }; a.extend(&b); assert_eq!( a, I64Range { left: Some(2), right: Some(9) } ); // right now invalid ranges are are extended as normal though they can only ever extend in a single direction let weird = I64Range { left: Some(-2), right: Some(-9), }; a.extend(&weird); assert_eq!( a, I64Range { left: Some(-2), right: Some(9) } ); let weird = I64Range { left: Some(20), right: Some(10), }; a.extend(&weird); assert_eq!( a, I64Range { left: Some(-2), right: Some(10) } ); //same if we extend a weird one, we can make a valid, or invalid one... let mut weird = I64Range { left: Some(-2), right: Some(-9), }; let weird2 = I64Range { left: Some(-6), right: Some(-10), }; weird.extend(&weird2); assert_eq!( weird, I64Range { left: Some(-6), right: Some(-9) } ); // it is also possible to get a valid range from two weirds let weird3 = I64Range { left: Some(6), right: Some(3), }; weird.extend(&weird3); assert_eq!( weird, I64Range { left: Some(-6), right: Some(3) } ); assert!(weird.is_valid()); // extending with a valid should always produce a valid and will work as usual let mut weird = I64Range { left: Some(-6), right: Some(-9), }; let normal = I64Range { left: Some(2), right: Some(9), }; weird.extend(&normal); assert_eq!( weird, I64Range { left: Some(-6), right: Some(9) } ); } #[test] fn test_contains() { let a = I64Range { left: Some(2), right: Some(5), }; assert!(a.contains(2)); assert!(a.contains(4)); assert!(!a.contains(5)); assert!(!a.contains(6)); let a = I64Range { left: None, right: Some(-5), }; assert!(a.contains(-100)); assert!(!a.contains(0)); assert!(!a.contains(6)); let a = I64Range { left: Some(-10), right: None, }; assert!(a.contains(-10)); assert!(a.contains(0)); assert!(a.contains(1000)); assert!(!a.contains(-20)); //invalid ranges contain no points let a = I64Range { left: Some(0), right: Some(-5), }; assert!(!a.contains(-4)); assert!(!a.contains(1)); assert!(!a.contains(-6)); } #[test] fn test_duration() { let a = I64Range { left: Some(3), right: Some(7), }; assert_eq!(a.duration().unwrap(), 4); let a = I64Range { left: Some(-3), right: Some(7), }; assert_eq!(a.duration().unwrap(), 10); let a = I64Range { left: None, right: Some(7), }; assert_eq!(a.duration(), None); let a = I64Range { left: Some(3), right: None, }; assert_eq!(a.duration(), None); //invalid ranges return None durations as well let a = I64Range { left: Some(3), right: Some(0), }; assert_eq!(a.duration(), None); } #[test] fn test_checks() { let a = I64Range { left: Some(2), right: Some(5), }; assert!(a.is_valid()); assert!(!a.is_singleton()); let a = I64Range { left: None, right: Some(-5), }; assert!(a.is_valid()); assert!(!a.is_singleton()); let a = I64Range { left: Some(-10), right: None, }; assert!(a.is_valid()); assert!(!a.is_singleton()); let a = I64Range { left: Some(2), right: Some(2), }; assert!(a.is_valid()); assert!(a.is_singleton()); assert_eq!(a.duration().unwrap(), 0); let a = I64Range { left: Some(0), right: Some(-10), }; assert!(!a.is_valid()); assert!(!a.is_singleton()); } } ================================================ FILE: crates/counter-agg/src/tests.rs ================================================ // TODO Move to ../tests/lib.rs use crate::range::I64Range; use crate::*; use approx::assert_relative_eq; fn to_micro(t: f64) -> f64 { t * 1_000_000.0 } //do proper numerical comparisons on the values where that matters, use exact where it should be exact. #[track_caller] pub fn assert_close_enough(p1: &MetricSummary, p2: &MetricSummary) { assert_eq!(p1.first, p2.first, "first"); assert_eq!(p1.second, p2.second, "second"); assert_eq!(p1.penultimate, p2.penultimate, "penultimate"); assert_eq!(p1.last, p2.last, "last"); assert_eq!(p1.num_changes, p2.num_changes, "num_changes"); assert_eq!(p1.num_resets, p2.num_resets, "num_resets"); assert_eq!(p1.stats.n, p2.stats.n, "n"); assert_relative_eq!(p1.stats.sx, p2.stats.sx); assert_relative_eq!(p1.stats.sx2, p2.stats.sx2); assert_relative_eq!(p1.stats.sy, p2.stats.sy); assert_relative_eq!(p1.stats.sy2, p2.stats.sy2); assert_relative_eq!(p1.stats.sxy, p2.stats.sxy); } #[test] fn create() { let testpt = TSPoint { ts: 0, val: 0.0 }; let test = CounterSummaryBuilder::new(&testpt, None).build(); assert_eq!(test.first, testpt); assert_eq!(test.second, testpt); assert_eq!(test.penultimate, testpt); assert_eq!(test.last, testpt); assert_eq!(test.reset_sum, 0.0); } #[test] fn adding_point() { let mut test = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 0.0 }, None); let testpt = TSPoint { ts: 5, val: 10.0 }; test.add_point(&testpt).unwrap(); let test = test.build(); assert_eq!(test.first, TSPoint { ts: 0, val: 0.0 }); assert_eq!(test.second, testpt); assert_eq!(test.penultimate, TSPoint { ts: 0, val: 0.0 }); assert_eq!(test.last, testpt); assert_eq!(test.reset_sum, 0.0); assert_eq!(test.num_resets, 0); assert_eq!(test.num_changes, 1); } #[test] fn adding_points_to_counter() { let startpt = TSPoint { ts: 0, val: 0.0 }; let mut summary = CounterSummaryBuilder::new(&startpt, None); summary.add_point(&TSPoint { ts: 5, val: 10.0 }).unwrap(); summary.add_point(&TSPoint { ts: 10, val: 20.0 }).unwrap(); summary.add_point(&TSPoint { ts: 15, val: 20.0 }).unwrap(); summary.add_point(&TSPoint { ts: 20, val: 50.0 }).unwrap(); summary.add_point(&TSPoint { ts: 25, val: 10.0 }).unwrap(); let summary = summary.build(); assert_eq!(summary.first, startpt); assert_eq!(summary.second, TSPoint { ts: 5, val: 10.0 }); assert_eq!(summary.penultimate, TSPoint { ts: 20, val: 50.0 }); assert_eq!(summary.last, TSPoint { ts: 25, val: 10.0 }); assert_relative_eq!(summary.reset_sum, 50.0); assert_eq!(summary.num_resets, 1); assert_eq!(summary.num_changes, 4); assert_eq!(summary.stats.count(), 6); assert_relative_eq!(summary.stats.sum().unwrap().x, 0.000075); // non obvious one here, sumy should be the sum of all values including the resets at the time. assert_relative_eq!( summary.stats.sum().unwrap().y, 0.0 + 10.0 + 20.0 + 20.0 + 50.0 + 60.0 ); } #[test] fn adding_out_of_order_counter() { let startpt = TSPoint { ts: 0, val: 0.0 }; let mut summary = CounterSummaryBuilder::new(&startpt, None); summary.add_point(&TSPoint { ts: 5, val: 10.0 }).unwrap(); assert_eq!( CounterError::OrderError, summary.add_point(&TSPoint { ts: 2, val: 9.0 }).unwrap_err() ); } #[test] fn test_counter_delta() { let startpt = &TSPoint { ts: 0, val: 10.0 }; let mut summary = CounterSummaryBuilder::new(startpt, None); // with one point assert_relative_eq!(summary.clone().build().delta(), 0.0); // simple case summary.add_point(&TSPoint { ts: 10, val: 20.0 }).unwrap(); assert_relative_eq!(summary.clone().build().delta(), 10.0); //now with a reset summary.add_point(&TSPoint { ts: 20, val: 10.0 }).unwrap(); assert_relative_eq!(summary.clone().build().delta(), 20.0); } #[test] fn test_combine() { let mut summary = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 0.0 }, None); summary.add_point(&TSPoint { ts: 5, val: 10.0 }).unwrap(); summary.add_point(&TSPoint { ts: 10, val: 20.0 }).unwrap(); summary.add_point(&TSPoint { ts: 15, val: 30.0 }).unwrap(); summary.add_point(&TSPoint { ts: 20, val: 50.0 }).unwrap(); summary.add_point(&TSPoint { ts: 25, val: 10.0 }).unwrap(); summary.add_point(&TSPoint { ts: 30, val: 40.0 }).unwrap(); let mut part1 = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 0.0 }, None); part1.add_point(&TSPoint { ts: 5, val: 10.0 }).unwrap(); part1.add_point(&TSPoint { ts: 10, val: 20.0 }).unwrap(); let mut part2 = CounterSummaryBuilder::new(&TSPoint { ts: 15, val: 30.0 }, None); part2.add_point(&TSPoint { ts: 20, val: 50.0 }).unwrap(); part2.add_point(&TSPoint { ts: 25, val: 10.0 }).unwrap(); part2.add_point(&TSPoint { ts: 30, val: 40.0 }).unwrap(); let mut combined = part1.clone(); combined.combine(&part2.clone().build()).unwrap(); assert_close_enough(&summary.build(), &combined.build()); // test error in wrong direction assert_eq!( part2.combine(&part1.build()).unwrap_err(), CounterError::OrderError ); } #[test] fn test_combine_with_small_summary() { let mut summary = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 50.0 }, None); summary.add_point(&TSPoint { ts: 25, val: 10.0 }).unwrap(); // also tests that a reset at the boundary works correctly let part1 = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 50.0 }, None); let part2 = CounterSummaryBuilder::new(&TSPoint { ts: 25, val: 10.0 }, None); let mut combined = part1.clone(); combined.combine(&part2.clone().build()).unwrap(); assert_close_enough(&summary.build(), &combined.build()); // test error in wrong direction combined = part2; assert_eq!( combined.combine(&part1.build()).unwrap_err(), CounterError::OrderError ); } #[test] fn test_multiple_resets() { let startpt = TSPoint { ts: 0, val: 0.0 }; let mut summary = CounterSummaryBuilder::new(&startpt, None); summary.add_point(&TSPoint { ts: 5, val: 10.0 }).unwrap(); summary.add_point(&TSPoint { ts: 10, val: 20.0 }).unwrap(); summary.add_point(&TSPoint { ts: 15, val: 10.0 }).unwrap(); summary.add_point(&TSPoint { ts: 20, val: 40.0 }).unwrap(); summary.add_point(&TSPoint { ts: 25, val: 20.0 }).unwrap(); summary.add_point(&TSPoint { ts: 30, val: 40.0 }).unwrap(); let summary = summary.build(); assert_eq!(summary.first, startpt); assert_eq!(summary.second, TSPoint { ts: 5, val: 10.0 }); assert_eq!(summary.penultimate, TSPoint { ts: 25, val: 20.0 }); assert_eq!(summary.last, TSPoint { ts: 30, val: 40.0 }); assert_relative_eq!(summary.reset_sum, 60.0); assert_eq!(summary.num_resets, 2); assert_eq!(summary.num_changes, 6); assert_eq!(summary.stats.count(), 7); assert_relative_eq!(summary.stats.sum().unwrap().x, 0.000105); // non obvious one here, sy should be the sum of all values including the resets at the time they were added. assert_relative_eq!( summary.stats.sum().unwrap().y, 0.0 + 10.0 + 20.0 + 30.0 + 60.0 + 80.0 + 100.0 ); let mut part1 = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 0.0 }, None); part1.add_point(&TSPoint { ts: 5, val: 10.0 }).unwrap(); part1.add_point(&TSPoint { ts: 10, val: 20.0 }).unwrap(); let mut part2 = CounterSummaryBuilder::new(&TSPoint { ts: 15, val: 10.0 }, None); part2.add_point(&TSPoint { ts: 20, val: 40.0 }).unwrap(); part2.add_point(&TSPoint { ts: 25, val: 20.0 }).unwrap(); part2.add_point(&TSPoint { ts: 30, val: 40.0 }).unwrap(); let mut combined = part1.clone(); combined.combine(&part2.clone().build()).unwrap(); assert_close_enough(&summary, &combined.build()); // test error in wrong direction assert_eq!( part2.combine(&part1.build()).unwrap_err(), CounterError::OrderError ); } #[test] fn test_extraction_single_point() { let startpt = TSPoint { ts: 20, val: 10.0 }; let summary = CounterSummaryBuilder::new(&startpt, None).build(); assert_relative_eq!(summary.delta(), 0.0); assert_eq!(summary.rate(), None); assert_relative_eq!(summary.idelta_left(), 0.0); assert_relative_eq!(summary.idelta_right(), 0.0); assert_eq!(summary.irate_left(), None); assert_eq!(summary.irate_right(), None); assert_eq!(summary.num_changes, 0); assert_eq!(summary.num_resets, 0); } #[test] fn test_extraction_simple() { let mut summary = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 0.0 }, None); summary.add_point(&TSPoint { ts: 5, val: 5.0 }).unwrap(); summary.add_point(&TSPoint { ts: 10, val: 20.0 }).unwrap(); summary.add_point(&TSPoint { ts: 15, val: 30.0 }).unwrap(); let summary = summary.build(); assert_relative_eq!(summary.delta(), 30.0); assert_relative_eq!(summary.rate().unwrap(), to_micro(2.0)); assert_relative_eq!(summary.idelta_left(), 5.0); assert_relative_eq!(summary.idelta_right(), 10.0); assert_relative_eq!(summary.irate_left().unwrap(), to_micro(1.0)); assert_relative_eq!(summary.irate_right().unwrap(), to_micro(2.0)); assert_eq!(summary.num_changes, 3); assert_eq!(summary.num_resets, 0); } #[test] fn test_extraction_with_resets() { let mut summary = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 10.0 }, None); summary.add_point(&TSPoint { ts: 5, val: 5.0 }).unwrap(); summary.add_point(&TSPoint { ts: 10, val: 30.0 }).unwrap(); summary.add_point(&TSPoint { ts: 15, val: 15.0 }).unwrap(); let summary = summary.build(); assert_relative_eq!(summary.delta(), 45.0); assert_relative_eq!(summary.rate().unwrap(), to_micro(3.0)); assert_relative_eq!(summary.idelta_left(), 5.0); assert_relative_eq!(summary.idelta_right(), 15.0); assert_relative_eq!(summary.irate_left().unwrap(), to_micro(1.0)); assert_relative_eq!(summary.irate_right().unwrap(), to_micro(3.0)); assert_eq!(summary.num_changes, 3); assert_eq!(summary.num_resets, 2); } #[test] fn test_bounds() { let summary = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 10.0 }, None); assert!(summary.bounds_valid()); // no bound is fine. let summary = CounterSummaryBuilder::new( &TSPoint { ts: 0, val: 10.0 }, Some(I64Range { left: Some(5), right: Some(10), }), ); assert!(!summary.bounds_valid()); // wrong bound not // left bound inclusive let mut summary = CounterSummaryBuilder::new( &TSPoint { ts: 0, val: 10.0 }, Some(I64Range { left: Some(0), right: Some(10), }), ); assert!(summary.bounds_valid()); summary.add_point(&TSPoint { ts: 5, val: 5.0 }).unwrap(); assert!(summary.bounds_valid()); // adding points past our bounds is okay, but the bounds will be invalid when we check, this will happen in the final function not on every point addition for efficiency // note the right bound is exclusive summary.add_point(&TSPoint { ts: 10, val: 10.0 }).unwrap(); assert!(!summary.bounds_valid()); // slightly weird case here... two invalid bounds can produce a validly bounded object once the bounds are combined, this is a bit weird, but seems like it's the correct behavior let summary2 = CounterSummaryBuilder::new( &TSPoint { ts: 15, val: 10.0 }, Some(I64Range { left: Some(20), right: Some(30), }), ); summary.combine(&summary2.build()).unwrap(); assert!(summary.bounds_valid()); assert_eq!( summary.clone().build().bounds.unwrap(), I64Range { left: Some(0), right: Some(30) } ); // two of the same valid bounds remain the same and valid let summary2 = CounterSummaryBuilder::new( &TSPoint { ts: 20, val: 10.0 }, Some(I64Range { left: Some(0), right: Some(30), }), ); summary.combine(&summary2.build()).unwrap(); assert!(summary.bounds_valid()); assert_eq!( summary.clone().build().bounds.unwrap(), I64Range { left: Some(0), right: Some(30) } ); // combining with unbounded ones is fine, but the bounds survive let summary2 = CounterSummaryBuilder::new(&TSPoint { ts: 25, val: 10.0 }, None); summary.combine(&summary2.build()).unwrap(); assert!(summary.bounds_valid()); assert_eq!( summary.clone().build().bounds.unwrap(), I64Range { left: Some(0), right: Some(30) } ); // and combining bounds that do not span are still invalid let summary2 = CounterSummaryBuilder::new( &TSPoint { ts: 35, val: 10.0 }, Some(I64Range { left: Some(0), right: Some(32), }), ); summary.combine(&summary2.build()).unwrap(); assert!(!summary.bounds_valid()); assert_eq!( summary.build().bounds.unwrap(), I64Range { left: Some(0), right: Some(32) } ); // combining unbounded with bounded ones is fine, but the bounds survive let mut summary = CounterSummaryBuilder::new(&TSPoint { ts: 0, val: 10.0 }, None); let summary2 = CounterSummaryBuilder::new( &TSPoint { ts: 25, val: 10.0 }, Some(I64Range { left: Some(0), right: Some(30), }), ); summary.combine(&summary2.build()).unwrap(); assert!(summary.bounds_valid()); assert_eq!( summary.build().bounds.unwrap(), I64Range { left: Some(0), right: Some(30) } ); } #[test] fn test_prometheus_extrapolation_simple() { //error on lack of bounds provided let summary = CounterSummaryBuilder::new( &TSPoint { ts: 5000, val: 15.0, }, None, ); let summary = summary.build(); assert_eq!( summary.prometheus_delta().unwrap_err(), CounterError::BoundsInvalid ); assert_eq!( summary.prometheus_rate().unwrap_err(), CounterError::BoundsInvalid ); //error on infinite bounds let summary = CounterSummaryBuilder::new( &TSPoint { ts: 5000, val: 15.0, }, Some(I64Range { left: None, right: Some(21000), }), ) .build(); assert_eq!( summary.prometheus_delta().unwrap_err(), CounterError::BoundsInvalid ); assert_eq!( summary.prometheus_rate().unwrap_err(), CounterError::BoundsInvalid ); //ranges less than 1ms are treated as zero by Prom let mut summary = CounterSummaryBuilder::new( &TSPoint { ts: 300, val: 15.0 }, Some(I64Range { left: Some(0), right: Some(900), }), ); summary.add_point(&TSPoint { ts: 600, val: 20.0 }).unwrap(); assert_eq!(summary.build().prometheus_rate().unwrap(), None); //ranges should go out an extra 1000 so that we account for the extra duration that prom subtracts (1 ms) let mut summary = CounterSummaryBuilder::new( &TSPoint { ts: 5000, val: 15.0, }, Some(I64Range { left: Some(0), right: Some(21000), }), ); // singletons should return none assert_eq!(summary.clone().build().prometheus_delta().unwrap(), None); assert_eq!(summary.clone().build().prometheus_rate().unwrap(), None); // TODO Was this intentional? add_point and then we immediately discard! summary .add_point(&TSPoint { ts: 10000, val: 20.0, }) .unwrap(); //ranges should go out an extra 1000 so that we account for the extra duration that prom subtracts (1 ms) let mut summary = CounterSummaryBuilder::new( &TSPoint { ts: 5000, val: 15.0, }, Some(I64Range { left: Some(0), right: Some(21000), }), ); // singletons should return none assert_eq!(summary.clone().build().prometheus_delta().unwrap(), None); assert_eq!(summary.clone().build().prometheus_rate().unwrap(), None); summary .add_point(&TSPoint { ts: 10000, val: 20.0, }) .unwrap(); summary .add_point(&TSPoint { ts: 15000, val: 25.0, }) .unwrap(); let summary = summary.build(); assert_relative_eq!(summary.delta(), 10.0); assert_relative_eq!(summary.rate().unwrap(), to_micro(0.001)); assert_relative_eq!(summary.prometheus_delta().unwrap().unwrap(), 20.0); // linear cases like this should be equal assert_relative_eq!( summary.prometheus_rate().unwrap().unwrap(), summary.rate().unwrap() ); // add a point outside our bounds and make sure we error correctly let mut summary = CounterSummaryBuilder::from(summary); summary .add_point(&TSPoint { ts: 25000, val: 35.0, }) .unwrap(); let summary = summary.build(); assert_eq!( summary.prometheus_delta().unwrap_err(), CounterError::BoundsInvalid ); assert_eq!( summary.prometheus_rate().unwrap_err(), CounterError::BoundsInvalid ); } #[test] fn test_prometheus_extrapolation_bound_size() { let mut summary = CounterSummaryBuilder::new( &TSPoint { ts: 20000, val: 40.0, }, Some(I64Range { left: Some(10000), right: Some(51000), }), ); summary .add_point(&TSPoint { ts: 30000, val: 20.0, }) .unwrap(); summary .add_point(&TSPoint { ts: 40000, val: 40.0, }) .unwrap(); let summary = summary.build(); assert_relative_eq!(summary.delta(), 40.0); assert_relative_eq!(summary.rate().unwrap(), to_micro(0.002)); //we go all the way to the edge of the bounds here because it's within 1.1 average steps (when you subtract the extra 1000 for ms it goes to 50000) assert_relative_eq!(summary.prometheus_delta().unwrap().unwrap(), 80.0); // linear cases like this should be equal assert_relative_eq!( summary.prometheus_rate().unwrap().unwrap(), summary.rate().unwrap() ); // now lets push the bounds to be a bit bigger let mut summary = CounterSummaryBuilder::from(summary); summary.set_bounds(Some(I64Range { left: Some(8000), right: Some(53000), })); // now because we're further than 1.1 out on each side, we end projecting out to half the avg distance on each side assert_relative_eq!( summary.clone().build().prometheus_delta().unwrap().unwrap(), 60.0 ); // but the rate is still divided by the full bound duration assert_relative_eq!( summary.build().prometheus_rate().unwrap().unwrap(), to_micro(60.0 / 44000.0) ); //this should all be the same as the last one in the first part. // The change occurs because we hit the zero boundary condition // so things change on the second bit because of where resets occur and our starting value let mut summary = CounterSummaryBuilder::new( &TSPoint { ts: 20000, val: 20.0, }, Some(I64Range { left: Some(10000), right: Some(51000), }), ); summary .add_point(&TSPoint { ts: 30000, val: 40.0, }) .unwrap(); summary .add_point(&TSPoint { ts: 40000, val: 20.0, }) .unwrap(); let summary = summary.build(); assert_relative_eq!(summary.delta(), 40.0); assert_relative_eq!(summary.rate().unwrap(), to_micro(0.002)); //we go all the way to the edge of the bounds here because it's within 1.1 average steps assert_relative_eq!(summary.prometheus_delta().unwrap().unwrap(), 80.0); // linear cases like this should be equal assert_relative_eq!( summary.prometheus_rate().unwrap().unwrap(), summary.rate().unwrap() ); // now lets push the bounds to be a bit bigger let mut summary = CounterSummaryBuilder::from(summary); summary.set_bounds(Some(I64Range { left: Some(8000), right: Some(53000), })); let summary = summary.build(); // now because we're further than 1.1 out on the right side, // we end projecting out to half the avg distance on that side, // but because we hit the inferred zero point on the left (0 in this case) // we use zero as the bound on the left side assert_relative_eq!(summary.prometheus_delta().unwrap().unwrap(), 70.0); // but the rate is still divided by the full bound duration assert_relative_eq!( summary.prometheus_rate().unwrap().unwrap(), to_micro(70.0 / 44000.0) ); } ================================================ FILE: crates/encodings/Cargo.toml ================================================ [package] name = "encodings" version = "0.1.0" edition = "2021" [dependencies] [dev-dependencies] quickcheck = "1" quickcheck_macros = "1" ================================================ FILE: crates/encodings/src/lib.rs ================================================ pub mod delta { use crate::zigzag; pub fn i64_decoder() -> impl FnMut(i64) -> i64 { let mut prev = 0i64; move |delta| { let value = prev.wrapping_add(delta); prev = value; value } } pub fn u64_decoder() -> impl FnMut(u64) -> u64 { let mut prev = 0u64; move |delta| { let delta = zigzag::decode(delta) as u64; let value = prev.wrapping_add(delta); prev = value; value } } pub fn i64_encoder() -> impl FnMut(i64) -> i64 { let mut prev = 0i64; move |value: i64| { let delta = value.wrapping_sub(prev); prev = value; delta } } pub fn u64_encoder() -> impl FnMut(u64) -> u64 { let mut prev = 0u64; move |value: u64| { let delta = value.wrapping_sub(prev); prev = value; zigzag::encode(delta as i64) } } #[cfg(test)] mod test { use quickcheck_macros::quickcheck; use super::*; #[quickcheck] fn quick_test_roundtrip_u64(values: Vec) -> bool { let mut bytes = vec![]; crate::prefix_varint::compress_u64s_to_vec( &mut bytes, values.iter().cloned().map(u64_encoder()), ); let output: Vec = crate::prefix_varint::u64_decompressor(&bytes) .map(u64_decoder()) .collect(); assert_eq!(values, output); true } #[quickcheck] fn quick_test_roundtrip_i64(values: Vec) -> bool { let mut bytes = vec![]; crate::prefix_varint::compress_i64s_to_vec( &mut bytes, values.iter().cloned().map(i64_encoder()), ); let output: Vec = crate::prefix_varint::i64_decompressor(&bytes) .map(i64_decoder()) .collect(); assert_eq!(values, output); true } } } pub mod zigzag { #[inline(always)] pub fn encode(n: i64) -> u64 { if n < 0 { // let's avoid the edge case of i64::min_value() // !n is equal to `-n - 1`, so this is: // !n * 2 + 1 = 2(-n - 1) + 1 = -2n - 2 + 1 = -2n - 1 !(n as u64) * 2 + 1 } else { (n as u64) * 2 } } #[inline(always)] pub fn decode(n: u64) -> i64 { if n % 2 == 0 { // positive number (n / 2) as i64 } else { // negative number // !m * 2 + 1 = n // !m * 2 = n - 1 // !m = (n - 1) / 2 // m = !((n - 1) / 2) // since we have n is odd, we have floor(n / 2) = floor((n - 1) / 2) !(n / 2) as i64 } } } pub mod prefix_varint { //! Similar to [LEB128](https://en.wikipedia.org/wiki/LEB128), but it moves //! all the tag bits to the LSBs of the first byte, which ends up looking //! like this (`x` is a value bit, the rest are tag bits): //! ```python,ignore,no_run //! xxxxxxx1 7 bits in 1 byte //! xxxxxx10 14 bits in 2 bytes //! xxxxx100 21 bits in 3 bytes //! xxxx1000 28 bits in 4 bytes //! xxx10000 35 bits in 5 bytes //! xx100000 42 bits in 6 bytes //! x1000000 49 bits in 7 bytes //! 10000000 56 bits in 8 bytes //! 00000000 64 bits in 9 bytes //! ``` //! based on https://github.com/stoklund/varint pub fn size_vec>(bytes: &mut Vec, values: I) { let size: usize = values.map(|v| bytes_for_value(v) as usize).sum(); bytes.reserve(size + 9); } #[inline] pub fn bytes_for_value(value: u64) -> u32 { let bits = value.leading_zeros(); let mut bytes = 1 + bits.wrapping_sub(1) / 7; if bits > 56 { bytes = 9 } bytes } pub struct I64Compressor i64> { compressor: U64Compressor u64>, encoder: F, } impl I64Compressor i64> { pub fn new() -> Self { Self { compressor: U64Compressor::new(), encoder: |i| i, } } } impl Default for I64Compressor i64> { fn default() -> Self { Self::new() } } impl i64> I64Compressor { pub fn with(encoder: F) -> Self { Self { compressor: U64Compressor::new(), encoder, } } pub fn push(&mut self, value: i64) { let encoded = crate::zigzag::encode((self.encoder)(value)); self.compressor.push(encoded) } pub fn finish(self) -> Vec { self.compressor.finish() } } pub struct U64Compressor u64> { bytes: Vec, encoder: F, } impl U64Compressor u64> { pub fn new() -> Self { Self { bytes: vec![], encoder: |i| i, } } } impl Default for U64Compressor u64> { fn default() -> Self { Self::new() } } impl u64> U64Compressor { pub fn with(encoder: F) -> Self { Self { bytes: vec![], encoder, } } pub fn push(&mut self, value: u64) { let encoded = (self.encoder)(value); write_to_vec(&mut self.bytes, encoded); } pub fn finish(self) -> Vec { self.bytes } pub fn is_empty(&self) -> bool { self.bytes.is_empty() } } pub fn compress_i64s_to_vec>(bytes: &mut Vec, values: I) { compress_u64s_to_vec(bytes, values.map(crate::zigzag::encode)) } pub fn compress_u64s_to_vec>(bytes: &mut Vec, values: I) { values.for_each(|v| write_to_vec(bytes, v)); } // based on https://github.com/stoklund/varint, (Apache licensed) // see also https://github.com/WebAssembly/design/issues/601 #[inline] pub fn write_to_vec(out: &mut Vec, mut value: u64) { if value == 0 { out.push(0x1); return; } let bits = 64 - value.leading_zeros(); let mut bytes = 1 + bits.wrapping_sub(1) / 7; if bits > 56 { out.push(0); bytes = 8 } else if value != 0 { value = (2 * value + 1) << (bytes - 1) } let value = value.to_le_bytes(); for value in value.iter().take(bytes as usize) { out.push(*value); } } type Value = u64; pub fn i64_decompressor(bytes: &[u8]) -> impl Iterator + '_ { u64_decompressor(bytes).map(crate::zigzag::decode) } pub fn u64_decompressor(mut bytes: &[u8]) -> impl Iterator + '_ { std::iter::from_fn(move || { if bytes.is_empty() { None } else { let (value, len) = read_from_slice(bytes); bytes = &bytes[len..]; Some(value) } }) } #[inline] pub fn read_from_slice(bytes: &[u8]) -> (Value, usize) { let value: [u8; 8] = if bytes.len() >= 8 { bytes[0..8].try_into().unwrap() } else { let mut value = [0; 8]; value[..bytes.len()].copy_from_slice(bytes); value }; let tag_byte = value[0]; if tag_byte & 1 == 1 { let value = (tag_byte >> 1) as u64; return (value, 1); } let length = prefix_length(tag_byte) as usize; let value = if length < 9 { let unused = 64 - 8 * length; let value = u64::from_le_bytes(value); (value << unused) >> (unused + length) } else { u64::from_le_bytes(bytes[1..9].try_into().unwrap()) }; (value, length) } #[inline(always)] pub fn prefix_length(tag_byte: u8) -> u32 { 1 + ((tag_byte as u32) | 0x100).trailing_zeros() } #[cfg(test)] mod test { use quickcheck_macros::quickcheck; use super::*; #[quickcheck] fn quick_test_roundtrip_u64(values: Vec) -> bool { let mut bytes = vec![]; compress_u64s_to_vec(&mut bytes, values.iter().cloned()); let output: Vec = u64_decompressor(&bytes).collect(); assert_eq!(values, output); true } #[quickcheck] fn quick_test_roundtrip_i64(values: Vec) -> bool { let mut bytes = vec![]; compress_i64s_to_vec(&mut bytes, values.iter().cloned()); let output: Vec = i64_decompressor(&bytes).collect(); assert_eq!(values, output); true } } } ================================================ FILE: crates/flat_serialize/Readme.md ================================================ # Flat Serialize # A cannonicalization of write-to-pointer style serialization. You write a definition describing the layout the data should have when serialized, and the macro will generate code that reads and writes each field in order. It also supports variable-length fields where the length is stored in an earlier field. ## Examples ## ### Basic ### ```rust /// This will define a struct like /// ``` /// struct Basic<'a> { /// header: u32, /// data_len: usize, /// array: [u16; 3], /// data: &'a [u8], /// data2: &'a [u8], /// } /// ``` /// along with various functions to read and write this data to byte buffers /// (see below) flat_serialize!{ struct Basic<'a> { header: u32, data_len: usize, array: [u16; 3], data: [u8; self.data_len], data2: [u8; self.data_len / 2], } } #[test] fn basic() { let basic = Basic{ header: 33, array: [202, 404, 555], data: &[1, 3, 5, 7, 9, 11], data2: &[4, 4, 4], }; // The generated struct can be used to serialize data to a byte vector let &mut serialized = Vec::with_capacity(basic.len()); basic.fill_vec(&mut serialized) // or deserialize data from a vector so written let (deserialized, remaining_bytes) = unsafe { Basic::try_ref(&bytes).unwrap() }; assert_eq!(deserialized.header, &33); assert_eq!(deserialized.array, &[202, 404, 555]); assert_eq!(deserialized.data, &[1, 3, 5, 7, 9, 11][..]); assert_eq!(deserialized.data2, &[4, 4, 4][..]); assert_eq!(remaining_bytes, &[][..]); // For serialization, the generated code will simply write each field, one // after another. (It is currently the programmer's responsibility to ensure // that the fields will be aligned correctly for deserialization) let mut expected = Vec::new(); bytes.extend_from_slice(&33u32.to_ne_bytes()); bytes.extend_from_slice(&6usize.to_ne_bytes()); bytes.extend_from_slice(&202u16.to_ne_bytes()); bytes.extend_from_slice(&404u16.to_ne_bytes()); bytes.extend_from_slice(&555u16.to_ne_bytes()); bytes.extend_from_slice(&[1, 3, 5, 7, 9, 11]); bytes.extend_from_slice(&[4, 4, 4]); assert_eq!(serialized, expected); } ``` ### Advanced ### ```rust /// flat-serializable values can be nested, a field marked with /// `flat_serialize::flatten` will be read and written using `FlattenableRef`. // The data layout is equivalent to just inlining all the fields. /// ``` /// struct Nested<'a> { /// prefix: u64, /// basic: Basic<'a>, /// } /// ``` flat_serialize!{ struct Nested<'a> { prefix: u64, #[flat_serialize::flatten] basic: Basic<'a>, } } /// Enum-like values are also supported. The enum tag is stored immediately /// before the enum fields. flat_serialize!{ enum Enum<'a> { k: u64, First: 2 { data_len: u32, data: [u8; self.data_len], }, Fixed: 3 { array: [u16; 3], }, } } fn enum_example(e: Enum) { match e { Enum::First{ data_len, data } => todo!(), Enum::Fixed{ array } => todo!(), } } ``` ================================================ FILE: crates/flat_serialize/example_generated.rs ================================================ #![allow(unused_imports)] use crate as flat_serialize; #[derive(Clone, Debug)] pub struct Basic<'input> { pub header: u64, pub data_len: u32, pub array: [u16; 3], pub data: >::SLICE, pub data2: <[u8; 2] as flat_serialize::FlatSerializable<'input>>::SLICE, } #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % <[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(<[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; min_align = match <[u16; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; if ::REQUIRED_ALIGNMENT < min_align { min_align = ::REQUIRED_ALIGNMENT } min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % <[u8; 2] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(<[u8; 2] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; if <[u8; 2] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT < min_align { min_align = <[u8; 2] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT } min_align = match <[u8; 2] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; }; const _: () = { fn header<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = header::; fn data_len<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = data_len::; fn array<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = array::<[u16; 3]>; fn data<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = data::; fn data2<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = data2::<[u8; 2]>; }; unsafe impl<'input> flat_serialize::FlatSerializable<'input> for Basic<'input> { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment = 1; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = <[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = <[u8; 2] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::align_of; let mut min_align: Option = None; let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = <[u16; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = { Some(::REQUIRED_ALIGNMENT) }; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = { Some(<[u8; 2] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT) }; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } match min_align { None => None, Some(min_align) => { let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } } } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size = 0; size += ::MIN_LEN; size += ::MIN_LEN; size += <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; size += 0; size += 0; size }; const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Iterable<'input, Basic<'input>>; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref( mut input: &'input [u8], ) -> Result<(Self, &'input [u8]), flat_serialize::WrapErr> { if input.len() < Self::MIN_LEN { return Err(flat_serialize::WrapErr::NotEnoughBytes(Self::MIN_LEN)); } let __packet_macro_read_len = 0usize; let mut header: Option = None; let mut data_len: Option = None; let mut array: Option<[u16; 3]> = None; let mut data: Option<>::SLICE> = None; let mut data2: Option<<[u8; 2] as flat_serialize::FlatSerializable<'_>>::SLICE> = None; 'tryref: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; header = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; data_len = Some(field); } { let (field, rem) = match <[u16; 3]>::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; array = Some(field); } { let count = (data_len.clone().unwrap()) as usize; let (field, rem) = match <_ as flat_serialize::Slice<'_>>::try_ref(input, count) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; data = Some(field); } { let count = (data_len.clone().unwrap() / 3) as usize; let (field, rem) = match <_ as flat_serialize::Slice<'_>>::try_ref(input, count) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; data2 = Some(field); } let _ref = Basic { header: header.unwrap(), data_len: data_len.unwrap(), array: array.unwrap(), data: data.unwrap(), data2: data2.unwrap(), }; return Ok((_ref, input)); } Err(flat_serialize::WrapErr::NotEnoughBytes( 0 + ::MIN_LEN + ::MIN_LEN + <[u16; 3]>::MIN_LEN + (|| { ::MIN_LEN * (match data_len { Some(data_len) => data_len, None => return 0usize, }) as usize })() + (|| { <[u8; 2]>::MIN_LEN * (match data_len { Some(data_len) => data_len, None => return 0usize, } / 3) as usize })(), )) } #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.len(); let (mut input, rem) = input.split_at_mut(total_len); let Basic { header, data_len, array, data, data2, } = self; unsafe { input = header.fill_slice(input); }; unsafe { input = data_len.fill_slice(input); }; unsafe { input = array.fill_slice(input); }; unsafe { let count = (*data_len) as usize; input = <_ as flat_serialize::Slice<'_>>::fill_slice(data, count, input); }; unsafe { let count = ((*data_len) / 3) as usize; input = <_ as flat_serialize::Slice<'_>>::fill_slice(data2, count, input); } debug_assert_eq!(input.len(), 0); rem } #[allow(unused_assignments, unused_variables)] #[inline(always)] fn len(&self) -> usize { let Basic { header, data_len, array, data, data2, } = self; 0usize + ::len(header) + ::len(data_len) + <[u16; 3] as flat_serialize::FlatSerializable>::len(array) + (<_ as flat_serialize::Slice<'_>>::len(data, (*data_len) as usize)) + (<_ as flat_serialize::Slice<'_>>::len(data2, ((*data_len) / 3) as usize)) } } #[derive(Clone, Debug, PartialEq, Eq)] pub struct Optional { pub header: u64, pub optional_field: Option, pub non_optional_field: u16, } #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; if ::REQUIRED_ALIGNMENT < min_align { min_align = ::REQUIRED_ALIGNMENT } min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; }; const _: () = { fn header<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = header::; fn optional_field<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = optional_field::; fn non_optional_field<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = non_optional_field::; }; unsafe impl<'a> flat_serialize::FlatSerializable<'a> for Optional { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment = 1; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::align_of; let mut min_align: Option = None; let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = { let ty_provied = ::MAX_PROVIDED_ALIGNMENT; match ty_provied { Some(align) => Some(align), None => Some(::REQUIRED_ALIGNMENT), } }; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } match min_align { None => None, Some(min_align) => { let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } } } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size = 0; size += ::MIN_LEN; size += 0; size += ::MIN_LEN; size }; const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Iterable<'a, Optional>; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref(mut input: &[u8]) -> Result<(Self, &[u8]), flat_serialize::WrapErr> { if input.len() < Self::MIN_LEN { return Err(flat_serialize::WrapErr::NotEnoughBytes(Self::MIN_LEN)); } let __packet_macro_read_len = 0usize; let mut header: Option = None; let mut optional_field: Option = None; let mut non_optional_field: Option = None; 'tryref: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; header = Some(field); } if header.clone().unwrap() != 1 { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; optional_field = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; non_optional_field = Some(field); } let _ref = Optional { header: header.unwrap(), optional_field: optional_field, non_optional_field: non_optional_field.unwrap(), }; return Ok((_ref, input)); } Err(flat_serialize::WrapErr::NotEnoughBytes( 0 + ::MIN_LEN + (|| { if match header { Some(header) => header, None => return 0usize, } != 1 { ::MIN_LEN } else { 0 } })() + ::MIN_LEN, )) } #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.len(); let (mut input, rem) = input.split_at_mut(total_len); let Optional { header, optional_field, non_optional_field, } = self; unsafe { input = header.fill_slice(input); }; unsafe { if (*header) != 1 { let optional_field: &u32 = optional_field.as_ref().unwrap(); input = optional_field.fill_slice(input); } }; unsafe { input = non_optional_field.fill_slice(input); } debug_assert_eq!(input.len(), 0); rem } #[allow(unused_assignments, unused_variables)] #[inline(always)] fn len(&self) -> usize { let Optional { header, optional_field, non_optional_field, } = self; 0usize + ::len(header) + (if (*header) != 1 { ::len(optional_field.as_ref().unwrap()) } else { 0 }) + ::len(non_optional_field) } } #[derive(Clone, Debug)] pub struct Nested<'a> { pub prefix: u64, pub basic: Basic<'a>, } #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; }; const _: () = { fn prefix<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = prefix::; fn basic<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = basic::>; }; unsafe impl<'a> flat_serialize::FlatSerializable<'a> for Nested<'a> { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment = 1; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::align_of; let mut min_align: Option = None; let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } match min_align { None => None, Some(min_align) => { let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } } } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size = 0; size += ::MIN_LEN; size += ::MIN_LEN; size }; const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Iterable<'a, Nested<'a>>; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref(mut input: &'a [u8]) -> Result<(Self, &'a [u8]), flat_serialize::WrapErr> { if input.len() < Self::MIN_LEN { return Err(flat_serialize::WrapErr::NotEnoughBytes(Self::MIN_LEN)); } let __packet_macro_read_len = 0usize; let mut prefix: Option = None; let mut basic: Option> = None; 'tryref: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; prefix = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; basic = Some(field); } let _ref = Nested { prefix: prefix.unwrap(), basic: basic.unwrap(), }; return Ok((_ref, input)); } Err(flat_serialize::WrapErr::NotEnoughBytes( 0 + ::MIN_LEN + ::MIN_LEN, )) } #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.len(); let (mut input, rem) = input.split_at_mut(total_len); let Nested { prefix, basic } = self; unsafe { input = prefix.fill_slice(input); }; unsafe { input = basic.fill_slice(input); } debug_assert_eq!(input.len(), 0); rem } #[allow(unused_assignments, unused_variables)] #[inline(always)] fn len(&self) -> usize { let Nested { prefix, basic } = self; 0usize + ::len(prefix) + ::len(basic) } } #[derive(Clone, Debug)] pub struct NestedOptional { pub present: u64, pub val: Option, } #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; if ::REQUIRED_ALIGNMENT < min_align { min_align = ::REQUIRED_ALIGNMENT } min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; }; const _: () = { fn present<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = present::; fn val<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = val::; }; unsafe impl<'a> flat_serialize::FlatSerializable<'a> for NestedOptional { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment = 1; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::align_of; let mut min_align: Option = None; let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = { let ty_provied = ::MAX_PROVIDED_ALIGNMENT; match ty_provied { Some(align) => Some(align), None => Some(::REQUIRED_ALIGNMENT), } }; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } match min_align { None => None, Some(min_align) => { let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } } } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size = 0; size += ::MIN_LEN; size += 0; size }; const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Iterable<'a, NestedOptional>; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref(mut input: &[u8]) -> Result<(Self, &[u8]), flat_serialize::WrapErr> { if input.len() < Self::MIN_LEN { return Err(flat_serialize::WrapErr::NotEnoughBytes(Self::MIN_LEN)); } let __packet_macro_read_len = 0usize; let mut present: Option = None; let mut val: Option = None; 'tryref: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; present = Some(field); } if present.clone().unwrap() > 2 { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; val = Some(field); } let _ref = NestedOptional { present: present.unwrap(), val: val, }; return Ok((_ref, input)); } Err(flat_serialize::WrapErr::NotEnoughBytes( 0 + ::MIN_LEN + (|| { if match present { Some(present) => present, None => return 0usize, } > 2 { ::MIN_LEN } else { 0 } })(), )) } #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.len(); let (mut input, rem) = input.split_at_mut(total_len); let NestedOptional { present, val } = self; unsafe { input = present.fill_slice(input); }; unsafe { if (*present) > 2 { let val: &Optional = val.as_ref().unwrap(); input = val.fill_slice(input); } } debug_assert_eq!(input.len(), 0); rem } #[allow(unused_assignments, unused_variables)] #[inline(always)] fn len(&self) -> usize { let NestedOptional { present, val } = self; 0usize + ::len(present) + (if (*present) > 2 { ::len(val.as_ref().unwrap()) } else { 0 }) } } #[derive(Clone, Debug)] pub struct NestedSlice<'b> { pub num_vals: u64, pub vals: >::SLICE, } #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; if ::REQUIRED_ALIGNMENT < min_align { min_align = ::REQUIRED_ALIGNMENT } min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; }; const _: () = { fn num_vals<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = num_vals::; fn vals<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = vals::; }; unsafe impl<'b> flat_serialize::FlatSerializable<'b> for NestedSlice<'b> { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment = 1; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::align_of; let mut min_align: Option = None; let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let ty_align = { Some(::REQUIRED_ALIGNMENT) }; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } match min_align { None => None, Some(min_align) => { let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } } } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size = 0; size += ::MIN_LEN; size += 0; size }; const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Iterable<'b, NestedSlice<'b>>; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref(mut input: &'b [u8]) -> Result<(Self, &'b [u8]), flat_serialize::WrapErr> { if input.len() < Self::MIN_LEN { return Err(flat_serialize::WrapErr::NotEnoughBytes(Self::MIN_LEN)); } let __packet_macro_read_len = 0usize; let mut num_vals: Option = None; let mut vals: Option<>::SLICE> = None; 'tryref: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; num_vals = Some(field); } { let count = (num_vals.clone().unwrap()) as usize; let (field, rem) = match <_ as flat_serialize::Slice<'_>>::try_ref(input, count) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref, }; input = rem; vals = Some(field); } let _ref = NestedSlice { num_vals: num_vals.unwrap(), vals: vals.unwrap(), }; return Ok((_ref, input)); } Err(flat_serialize::WrapErr::NotEnoughBytes( 0 + ::MIN_LEN + (|| { ::MIN_LEN * (match num_vals { Some(num_vals) => num_vals, None => return 0usize, }) as usize })(), )) } #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.len(); let (mut input, rem) = input.split_at_mut(total_len); let NestedSlice { num_vals, vals } = self; unsafe { input = num_vals.fill_slice(input); }; unsafe { let count = (*num_vals) as usize; input = <_ as flat_serialize::Slice<'_>>::fill_slice(vals, count, input); } debug_assert_eq!(input.len(), 0); rem } #[allow(unused_assignments, unused_variables)] #[inline(always)] fn len(&self) -> usize { let NestedSlice { num_vals, vals } = self; 0usize + ::len(num_vals) + (<_ as flat_serialize::Slice<'_>>::len(vals, (*num_vals) as usize)) } } #[derive(Clone, Debug)] pub enum BasicEnum<'input> { First { data_len: u32, data: >::SLICE, }, Fixed { array: [u16; 3], }, } #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()][(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; { use std::mem::{align_of, size_of}; let mut current_size = current_size; let mut min_align = min_align; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; if ::REQUIRED_ALIGNMENT < min_align { min_align = ::REQUIRED_ALIGNMENT } min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; } { use std::mem::{align_of, size_of}; let mut current_size = current_size; let mut min_align = min_align; let _alignment_check: () = [()] [(current_size) % <[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(<[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; min_align = match <[u16; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; } }; const _: () = { #[allow(dead_code)] enum UniquenessCheck { First = 2, Fixed = 3, } }; const _: () = { fn k<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = k::; const _: () = { const _: () = { fn data_len<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = data_len::; fn data<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = data::; }; }; const _: () = { const _: () = { fn array<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = array::<[u16; 3]>; }; }; }; unsafe impl<'input> flat_serialize::FlatSerializable<'input> for BasicEnum<'input> { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment: usize = ::REQUIRED_ALIGNMENT; let alignment: usize = { let mut required_alignment = ::REQUIRED_ALIGNMENT; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; if alignment > required_alignment { required_alignment = alignment; } let alignment: usize = { let mut required_alignment = ::REQUIRED_ALIGNMENT; let alignment = <[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::{align_of, size_of}; let mut min_align: usize = match match ::MAX_PROVIDED_ALIGNMENT { Some(a) => Some(a), None => Some(8), } { None => 8, Some(align) => align, }; let variant_alignment: usize = { let mut min_align: Option = match ::MAX_PROVIDED_ALIGNMENT { Some(a) => Some(a), None => Some(8), }; let alignment = ::MAX_PROVIDED_ALIGNMENT; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let alignment = { Some(::REQUIRED_ALIGNMENT) }; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let variant_size: usize = ::MIN_LEN + ::MIN_LEN + 0; let effective_alignment = match min_align { Some(align) => align, None => 8, }; if variant_size % 8 == 0 && effective_alignment >= 8 { 8 } else if variant_size % 4 == 0 && effective_alignment >= 4 { 4 } else if variant_size % 2 == 0 && effective_alignment >= 2 { 2 } else { 1 } }; if variant_alignment < min_align { min_align = variant_alignment } let variant_alignment: usize = { let mut min_align: Option = match ::MAX_PROVIDED_ALIGNMENT { Some(a) => Some(a), None => Some(8), }; let alignment = <[u16; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let variant_size: usize = ::MIN_LEN + <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; let effective_alignment = match min_align { Some(align) => align, None => 8, }; if variant_size % 8 == 0 && effective_alignment >= 8 { 8 } else if variant_size % 4 == 0 && effective_alignment >= 4 { 4 } else if variant_size % 2 == 0 && effective_alignment >= 2 { 2 } else { 1 } }; if variant_alignment < min_align { min_align = variant_alignment } let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size: Option = None; let variant_size = { let mut size: usize = ::MIN_LEN; size += ::MIN_LEN; size += 0; size }; size = match size { None => Some(variant_size), Some(size) if size > variant_size => Some(variant_size), Some(size) => Some(size), }; let variant_size = { let mut size: usize = ::MIN_LEN; size += <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; size }; size = match size { None => Some(variant_size), Some(size) if size > variant_size => Some(variant_size), Some(size) => Some(size), }; match size { Some(size) => size, None => ::MIN_LEN, } }; const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Iterable<'input, BasicEnum<'input>>; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref( mut input: &'input [u8], ) -> Result<(Self, &'input [u8]), flat_serialize::WrapErr> { let __packet_macro_read_len = 0usize; let mut k = None; 'tryref_tag: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_tag, }; input = rem; k = Some(field); }; match k { Some(2) => { let mut data_len: Option = None; let mut data: Option<>::SLICE> = None; 'tryref_0: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_0, }; input = rem; data_len = Some(field); } { let count = (data_len.clone().unwrap()) as usize; let (field, rem) = match <_ as flat_serialize::Slice<'_>>::try_ref(input, count) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_0, }; input = rem; data = Some(field); } let _ref = BasicEnum::First { data_len: data_len.unwrap(), data: data.unwrap(), }; return Ok((_ref, input)); } return Err(flat_serialize::WrapErr::NotEnoughBytes( std::mem::size_of::() + ::MIN_LEN + (|| { ::MIN_LEN * (match data_len { Some(data_len) => data_len, None => return 0usize, }) as usize })(), )); } Some(3) => { let mut array: Option<[u16; 3]> = None; 'tryref_1: loop { { let (field, rem) = match <[u16; 3]>::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_1, }; input = rem; array = Some(field); } let _ref = BasicEnum::Fixed { array: array.unwrap(), }; return Ok((_ref, input)); } return Err(flat_serialize::WrapErr::NotEnoughBytes( std::mem::size_of::() + <[u16; 3]>::MIN_LEN, )); } _ => return Err(flat_serialize::WrapErr::InvalidTag(0)), } } Err(flat_serialize::WrapErr::NotEnoughBytes( ::std::mem::size_of::(), )) } #[allow(unused_assignments, unused_variables)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.len(); let (mut input, rem) = input.split_at_mut(total_len); match self { BasicEnum::First { data_len, data } => { let k: &u64 = &2; unsafe { input = k.fill_slice(input); } unsafe { input = data_len.fill_slice(input); }; unsafe { let count = (*data_len) as usize; input = <_ as flat_serialize::Slice<'_>>::fill_slice(data, count, input); } } BasicEnum::Fixed { array } => { let k: &u64 = &3; unsafe { input = k.fill_slice(input); } unsafe { input = array.fill_slice(input); } } } debug_assert_eq!(input.len(), 0); rem } #[allow(unused_assignments, unused_variables)] fn len(&self) -> usize { match self { BasicEnum::First { data_len, data } => { ::std::mem::size_of::() + ::len(data_len) + (<_ as flat_serialize::Slice<'_>>::len(data, (*data_len) as usize)) } BasicEnum::Fixed { array } => { ::std::mem::size_of::() + <[u16; 3] as flat_serialize::FlatSerializable>::len(array) } } } } #[derive(Clone, Debug)] pub enum PaddedEnum<'input> { First { padding: [u8; 3], data_len: u32, data: >::SLICE, }, Fixed { padding: u8, array: [u16; 3], }, } #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; { use std::mem::{align_of, size_of}; let mut current_size = current_size; let mut min_align = min_align; let _alignment_check: () = [()] [(current_size) % <[u8; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(<[u8; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += <[u8; 3] as flat_serialize::FlatSerializable>::MIN_LEN; min_align = match <[u8; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; if ::REQUIRED_ALIGNMENT < min_align { min_align = ::REQUIRED_ALIGNMENT } min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; } { use std::mem::{align_of, size_of}; let mut current_size = current_size; let mut min_align = min_align; let _alignment_check: () = [()][(current_size) % ::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += ::MIN_LEN; min_align = match ::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; let _alignment_check: () = [()] [(current_size) % <[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT]; let _alignment_check2: () = [()] [(<[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT > min_align) as u8 as usize]; current_size += <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; min_align = match <[u16; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT { Some(align) if align < min_align => align, _ => min_align, }; } }; const _: () = { #[allow(dead_code)] enum UniquenessCheck { First = 2, Fixed = 3, } }; const _: () = { fn k<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = k::; const _: () = { const _: () = { fn padding<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = padding::<[u8; 3]>; fn data_len<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = data_len::; fn data<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = data::; }; }; const _: () = { const _: () = { fn padding<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = padding::; fn array<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = array::<[u16; 3]>; }; }; }; unsafe impl<'input> flat_serialize::FlatSerializable<'input> for PaddedEnum<'input> { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment: usize = ::REQUIRED_ALIGNMENT; let alignment: usize = { let mut required_alignment = ::REQUIRED_ALIGNMENT; let alignment = <[u8; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; if alignment > required_alignment { required_alignment = alignment; } let alignment: usize = { let mut required_alignment = ::REQUIRED_ALIGNMENT; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = <[u16; 3] as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::{align_of, size_of}; let mut min_align: usize = match match ::MAX_PROVIDED_ALIGNMENT { Some(a) => Some(a), None => Some(8), } { None => 8, Some(align) => align, }; let variant_alignment: usize = { let mut min_align: Option = match ::MAX_PROVIDED_ALIGNMENT { Some(a) => Some(a), None => Some(8), }; let alignment = <[u8; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let alignment = ::MAX_PROVIDED_ALIGNMENT; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let alignment = { Some(::REQUIRED_ALIGNMENT) }; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let variant_size: usize = ::MIN_LEN + <[u8; 3] as flat_serialize::FlatSerializable>::MIN_LEN + ::MIN_LEN + 0; let effective_alignment = match min_align { Some(align) => align, None => 8, }; if variant_size % 8 == 0 && effective_alignment >= 8 { 8 } else if variant_size % 4 == 0 && effective_alignment >= 4 { 4 } else if variant_size % 2 == 0 && effective_alignment >= 2 { 2 } else { 1 } }; if variant_alignment < min_align { min_align = variant_alignment } let variant_alignment: usize = { let mut min_align: Option = match ::MAX_PROVIDED_ALIGNMENT { Some(a) => Some(a), None => Some(8), }; let alignment = ::MAX_PROVIDED_ALIGNMENT; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let alignment = <[u16; 3] as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } let variant_size: usize = ::MIN_LEN + ::MIN_LEN + <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; let effective_alignment = match min_align { Some(align) => align, None => 8, }; if variant_size % 8 == 0 && effective_alignment >= 8 { 8 } else if variant_size % 4 == 0 && effective_alignment >= 4 { 4 } else if variant_size % 2 == 0 && effective_alignment >= 2 { 2 } else { 1 } }; if variant_alignment < min_align { min_align = variant_alignment } let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size: Option = None; let variant_size = { let mut size: usize = ::MIN_LEN; size += <[u8; 3] as flat_serialize::FlatSerializable>::MIN_LEN; size += ::MIN_LEN; size += 0; size }; size = match size { None => Some(variant_size), Some(size) if size > variant_size => Some(variant_size), Some(size) => Some(size), }; let variant_size = { let mut size: usize = ::MIN_LEN; size += ::MIN_LEN; size += <[u16; 3] as flat_serialize::FlatSerializable>::MIN_LEN; size }; size = match size { None => Some(variant_size), Some(size) if size > variant_size => Some(variant_size), Some(size) => Some(size), }; match size { Some(size) => size, None => ::MIN_LEN, } }; const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Iterable<'input, PaddedEnum<'input>>; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref( mut input: &'input [u8], ) -> Result<(Self, &'input [u8]), flat_serialize::WrapErr> { let __packet_macro_read_len = 0usize; let mut k = None; 'tryref_tag: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_tag, }; input = rem; k = Some(field); }; match k { Some(2) => { let mut padding: Option<[u8; 3]> = None; let mut data_len: Option = None; let mut data: Option<>::SLICE> = None; 'tryref_0: loop { { let (field, rem) = match <[u8; 3]>::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_0, }; input = rem; padding = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_0, }; input = rem; data_len = Some(field); } { let count = (data_len.clone().unwrap()) as usize; let (field, rem) = match <_ as flat_serialize::Slice<'_>>::try_ref(input, count) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_0, }; input = rem; data = Some(field); } let _ref = PaddedEnum::First { padding: padding.unwrap(), data_len: data_len.unwrap(), data: data.unwrap(), }; return Ok((_ref, input)); } return Err(flat_serialize::WrapErr::NotEnoughBytes( std::mem::size_of::() + <[u8; 3]>::MIN_LEN + ::MIN_LEN + (|| { ::MIN_LEN * (match data_len { Some(data_len) => data_len, None => return 0usize, }) as usize })(), )); } Some(3) => { let mut padding: Option = None; let mut array: Option<[u16; 3]> = None; 'tryref_1: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_1, }; input = rem; padding = Some(field); } { let (field, rem) = match <[u16; 3]>::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )) } Err(..) => break 'tryref_1, }; input = rem; array = Some(field); } let _ref = PaddedEnum::Fixed { padding: padding.unwrap(), array: array.unwrap(), }; return Ok((_ref, input)); } return Err(flat_serialize::WrapErr::NotEnoughBytes( std::mem::size_of::() + ::MIN_LEN + <[u16; 3]>::MIN_LEN, )); } _ => return Err(flat_serialize::WrapErr::InvalidTag(0)), } } Err(flat_serialize::WrapErr::NotEnoughBytes( ::std::mem::size_of::(), )) } #[allow(unused_assignments, unused_variables)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.len(); let (mut input, rem) = input.split_at_mut(total_len); match self { PaddedEnum::First { padding, data_len, data, } => { let k: &u8 = &2; unsafe { input = k.fill_slice(input); } unsafe { input = padding.fill_slice(input); }; unsafe { input = data_len.fill_slice(input); }; unsafe { let count = (*data_len) as usize; input = <_ as flat_serialize::Slice<'_>>::fill_slice(data, count, input); } } PaddedEnum::Fixed { padding, array } => { let k: &u8 = &3; unsafe { input = k.fill_slice(input); } unsafe { input = padding.fill_slice(input); }; unsafe { input = array.fill_slice(input); } } } debug_assert_eq!(input.len(), 0); rem } #[allow(unused_assignments, unused_variables)] fn len(&self) -> usize { match self { PaddedEnum::First { padding, data_len, data, } => { ::std::mem::size_of::() + <[u8; 3] as flat_serialize::FlatSerializable>::len(padding) + ::len(data_len) + (<_ as flat_serialize::Slice<'_>>::len(data, (*data_len) as usize)) } PaddedEnum::Fixed { padding, array } => { ::std::mem::size_of::() + ::len(padding) + <[u16; 3] as flat_serialize::FlatSerializable>::len(array) } } } } ================================================ FILE: crates/flat_serialize/flat_serialize/Cargo.toml ================================================ [package] name = "flat_serialize" version = "0.1.0" authors = ["Joshua Lockerman"] edition = "2021" [dependencies] ordered-float = "1.0" serde = "1.0" [dev-dependencies] flat_serialize_macro = {path="../flat_serialize_macro"} ================================================ FILE: crates/flat_serialize/flat_serialize/src/lib.rs ================================================ use std::{ fmt, marker::PhantomData, mem::{align_of, size_of, MaybeUninit}, slice, }; #[derive(Debug)] pub enum WrapErr { NotEnoughBytes(usize), InvalidTag(usize), } /// Trait marking that a type can be translated to and from a flat buffer /// without copying or allocation. /// /// # Safety /// For a type to be `FlatSerializable` it must contain no pointers, have no /// interior padding, must have a `size >= alignment` and must have /// `size % align = 0`. In general this should not be implemented manually, and /// you should only use `#[derive(FlatSerializable)]` or `flat_serialize!{}` to /// implement this. /// **NOTE** we currently allow types with invalid bit patterns, such as `bool` /// to be `FlatSerializable` making this trait inappropriate to use on untrusted /// input. pub unsafe trait FlatSerializable<'input>: Sized + 'input { const MIN_LEN: usize; const REQUIRED_ALIGNMENT: usize; const MAX_PROVIDED_ALIGNMENT: Option; const TRIVIAL_COPY: bool = false; type SLICE; type OWNED: 'static; #[allow(clippy::missing_safety_doc)] unsafe fn try_ref(input: &'input [u8]) -> Result<(Self, &'input [u8]), WrapErr>; fn fill_vec(&self, input: &mut Vec) { let start = input.len(); let my_len = self.num_bytes(); input.reserve(my_len); // simulate unstable spare_capacity_mut() let slice = unsafe { slice::from_raw_parts_mut( input.as_mut_ptr().add(input.len()) as *mut MaybeUninit, my_len, ) }; let rem = unsafe { self.fill_slice(slice) }; debug_assert_eq!(rem.len(), 0); unsafe { input.set_len(start + my_len); } } #[must_use] #[allow(clippy::missing_safety_doc)] unsafe fn fill_slice<'out>( &self, input: &'out mut [MaybeUninit], ) -> &'out mut [MaybeUninit]; fn num_bytes(&self) -> usize; fn make_owned(&mut self); fn into_owned(self) -> Self::OWNED; } #[macro_export] macro_rules! impl_flat_serializable { ($($typ:ty)+) => { $( unsafe impl<'i> FlatSerializable<'i> for $typ { const MIN_LEN: usize = size_of::(); const REQUIRED_ALIGNMENT: usize = align_of::(); const MAX_PROVIDED_ALIGNMENT: Option = None; const TRIVIAL_COPY: bool = true; type SLICE = $crate::Slice<'i, $typ>; type OWNED = Self; #[inline(always)] unsafe fn try_ref(input: &'i [u8]) -> Result<(Self, &'i [u8]), WrapErr> { let size = size_of::(); if input.len() < size { return Err(WrapErr::NotEnoughBytes(size)) } let (field, rem) = input.split_at(size); let field = field.as_ptr().cast::(); Ok((field.read_unaligned(), rem)) } #[inline(always)] unsafe fn fill_slice<'out>(&self, input: &'out mut [MaybeUninit]) -> &'out mut [MaybeUninit] { let size = size_of::(); let (input, rem) = input.split_at_mut(size); let bytes = (self as *const Self).cast::>(); let bytes = slice::from_raw_parts(bytes, size); // emulate write_slice_cloned() // for i in 0..size { // input[i] = MaybeUninit::new(bytes[i]) // } input.copy_from_slice(bytes); rem } #[inline(always)] fn num_bytes(&self) -> usize { size_of::() } #[inline(always)] fn make_owned(&mut self) { // nop } #[inline(always)] fn into_owned(self) -> Self::OWNED { self } } )+ }; } impl_flat_serializable!(bool); impl_flat_serializable!(i8 u8 i16 u16 i32 u32 i64 u64 i128 u128); impl_flat_serializable!(f32 f64 ordered_float::OrderedFloat ordered_float::OrderedFloat); // TODO ensure perf unsafe impl<'i, T, const N: usize> FlatSerializable<'i> for [T; N] where T: FlatSerializable<'i> + 'i, { const MIN_LEN: usize = { T::MIN_LEN * N }; const REQUIRED_ALIGNMENT: usize = T::REQUIRED_ALIGNMENT; const MAX_PROVIDED_ALIGNMENT: Option = T::MAX_PROVIDED_ALIGNMENT; const TRIVIAL_COPY: bool = T::TRIVIAL_COPY; // FIXME ensure no padding type SLICE = Slice<'i, [T; N]>; type OWNED = [T::OWNED; N]; #[inline(always)] unsafe fn try_ref(mut input: &'i [u8]) -> Result<(Self, &'i [u8]), WrapErr> { // TODO can we simplify based on T::TRIVIAL_COPY? if T::TRIVIAL_COPY && input.len() < (T::MIN_LEN * N) { return Err(WrapErr::NotEnoughBytes(T::MIN_LEN * N)); } let mut output: [MaybeUninit; N] = MaybeUninit::uninit().assume_init(); for item in output.iter_mut() { let (val, rem) = T::try_ref(input)?; *item = MaybeUninit::new(val); input = rem; } let output = (&output as *const [MaybeUninit; N]) .cast::<[T; N]>() .read(); Ok((output, input)) } #[inline(always)] unsafe fn fill_slice<'out>( &self, input: &'out mut [MaybeUninit], ) -> &'out mut [MaybeUninit] { let size = if Self::TRIVIAL_COPY { Self::MIN_LEN } else { self.len() }; let (mut input, rem) = input.split_at_mut(size); input = &mut input[..size]; // TODO is there a way to force a memcopy for trivial cases? for val in self { input = val.fill_slice(input); } debug_assert_eq!(input.len(), 0); rem } #[inline(always)] fn num_bytes(&self) -> usize { self.iter().map(T::num_bytes).sum() } fn make_owned(&mut self) { for val in self { val.make_owned() } } fn into_owned(self) -> Self::OWNED { let mut output: [MaybeUninit; N] = unsafe { MaybeUninit::uninit().assume_init() }; for (i, t) in self.into_iter().map(|s| s.into_owned()).enumerate() { output[i] = MaybeUninit::new(t) } unsafe { (&mut output as *mut [MaybeUninit; N]) .cast::() .read() } } } pub enum Slice<'input, T: 'input> { Iter(Unflatten<'input, T>), Slice(&'input [T]), Owned(Vec), } impl<'input, T: 'input> Slice<'input, T> { pub fn iter<'s>(&'s self) -> Iter<'input, 's, T> where 'input: 's, { match self { Slice::Iter(iter) => Iter::Unflatten(*iter), Slice::Slice(slice) => Iter::Slice(slice), Slice::Owned(vec) => Iter::Slice(vec), } } } impl<'input, T> std::iter::IntoIterator for Slice<'input, T> where T: FlatSerializable<'input> + Clone, { type Item = T; type IntoIter = Iter<'input, 'input, T>; fn into_iter(self) -> Self::IntoIter { match self { Slice::Iter(iter) => Iter::Unflatten(iter), Slice::Slice(slice) => Iter::Slice(slice), Slice::Owned(vec) => Iter::Owned(vec.into_iter()), } } } pub enum Iter<'input, 'borrow, T: 'input> { Unflatten(Unflatten<'input, T>), Slice(&'borrow [T]), Owned(std::vec::IntoIter), } impl<'input, T: 'input> Iterator for Iter<'input, '_, T> where T: FlatSerializable<'input> + Clone, { type Item = T; fn next(&mut self) -> Option { match self { Self::Unflatten(i) => { if i.slice.is_empty() { return None; } let (val, rem) = unsafe { ::try_ref(i.slice).unwrap() }; let additional_len = aligning_len(rem.as_ptr() as _, T::REQUIRED_ALIGNMENT); i.slice = &rem[additional_len..]; Some(val) } Self::Slice(s) => { let val = s.first().cloned(); if val.is_some() { *s = &s[1..] } val } Self::Owned(i) => i.next(), } } fn nth(&mut self, n: usize) -> Option { match self { Self::Unflatten(i) => { for _ in 0..n { i.next()?; } i.next() } Self::Slice(s) => { *s = s.get(n..)?; self.next() } Self::Owned(i) => i.nth(n), } } } impl<'input, T: 'input> Iter<'input, '_, T> where T: FlatSerializable<'input> + Clone, { pub fn len(&self) -> usize { match self { Self::Unflatten(i) => (*i).count(), Self::Slice(s) => s.len(), Self::Owned(i) => i.as_slice().len(), } } pub fn is_empty(&self) -> bool { match self { Self::Unflatten(i) => (*i).count() == 0, Self::Slice(s) => s.is_empty(), Self::Owned(i) => i.as_slice().is_empty(), } } } impl<'i, T: 'i> fmt::Debug for Slice<'i, T> where T: fmt::Debug + FlatSerializable<'i> + Clone, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.iter()).finish() } } impl<'i, T: 'i> PartialEq for Slice<'i, T> where T: FlatSerializable<'i> + Clone + PartialEq, { fn eq(&self, other: &Self) -> bool { as Iterator>::eq(self.iter(), other.iter()) } } impl<'i, T: 'i> Eq for Slice<'i, T> where T: FlatSerializable<'i> + Clone + Eq {} #[derive(Debug)] pub struct Unflatten<'input, T: 'input> { slice: &'input [u8], _pd: PhantomData<&'input T>, } impl<'input, T: 'input> Slice<'input, T> { #[allow(clippy::missing_safety_doc)] pub unsafe fn from_bytes(bytes: &'input [u8]) -> Self { Slice::Iter(Unflatten { slice: bytes, _pd: PhantomData, }) } pub fn len(&self) -> usize where T: Clone + FlatSerializable<'input>, { match self { Slice::Iter(..) => self.iter().count(), Slice::Slice(s) => s.len(), Slice::Owned(o) => o.len(), } } pub fn is_empty(&self) -> bool where T: Clone + FlatSerializable<'input>, { match self { Slice::Iter(..) => self.iter().count() == 0, Slice::Slice(s) => s.is_empty(), Slice::Owned(o) => o.is_empty(), } } pub fn make_owned(&mut self) where T: Clone + FlatSerializable<'input>, { self.as_owned(); } pub fn into_vec(self) -> Vec where T: Clone + FlatSerializable<'input>, { match self { Slice::Iter(_) => self.iter().map(|t| t.into_owned()).collect(), Slice::Slice(s) => s.iter().map(|t| t.clone().into_owned()).collect(), Slice::Owned(v) => v.into_iter().map(|t| t.into_owned()).collect(), } } pub fn into_owned(self) -> Slice<'static, T::OWNED> where T: Clone + FlatSerializable<'input>, { Slice::Owned(self.into_vec()) } pub fn as_owned(&mut self) -> &mut Vec where T: Clone + FlatSerializable<'input>, { match self { Slice::Iter(_) => { let vec = self.iter().collect(); *self = Slice::Owned(vec); } Slice::Slice(s) => { *self = Slice::Owned(s.to_vec()); } Slice::Owned(..) => (), } match self { Slice::Owned(vec) => vec, _ => unreachable!(), } } pub fn as_slice(&self) -> &[T] where T: Clone + FlatSerializable<'input>, { match self { Slice::Iter(_) => panic!("cannot convert iterator to slice without mutating"), Slice::Slice(s) => s, Slice::Owned(o) => o, } } pub fn slice(&self) -> &'input [T] where T: Clone + FlatSerializable<'input>, { match self { Slice::Slice(s) => s, _ => panic!("cannot convert to slice without mutating"), } } } impl<'input, T: 'input> Iterator for Unflatten<'input, T> where T: FlatSerializable<'input>, { type Item = T; fn next(&mut self) -> Option { if self.slice.is_empty() { return None; } let (val, rem) = unsafe { ::try_ref(self.slice).unwrap() }; self.slice = rem; Some(val) } } impl<'input, T: 'input> From<&'input [T]> for Slice<'input, T> { fn from(val: &'input [T]) -> Self { Self::Slice(val) } } impl<'input, T: 'input> From> for Slice<'input, T> { fn from(val: Vec) -> Self { Self::Owned(val) } } impl<'input, T: 'input> Clone for Slice<'input, T> where T: Clone, { fn clone(&self) -> Self { match self { Slice::Iter(i) => Slice::Iter(*i), Slice::Slice(s) => Slice::Slice(s), Slice::Owned(v) => Slice::Owned(Vec::clone(v)), } } } impl<'i, T> serde::Serialize for Slice<'i, T> where T: serde::Serialize + Clone + FlatSerializable<'i>, { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { use serde::ser::SerializeSeq; let mut s = serializer.serialize_seq(Some(self.len()))?; for t in self.iter() { s.serialize_element(&t)? } s.end() } } impl<'de, T> serde::Deserialize<'de> for Slice<'_, T> where T: serde::Deserialize<'de>, { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let v = Vec::deserialize(deserializer)?; Ok(Self::Owned(v)) } } impl<'input, T: 'input> Clone for Unflatten<'input, T> { fn clone(&self) -> Self { *self } } impl<'input, T: 'input> Copy for Unflatten<'input, T> {} #[doc(hidden)] pub unsafe trait VariableLen<'input>: Sized { #[allow(clippy::missing_safety_doc)] unsafe fn try_ref(input: &'input [u8], count: usize) -> Result<(Self, &'input [u8]), WrapErr>; #[must_use] #[allow(clippy::missing_safety_doc)] unsafe fn fill_slice<'out>( &self, count: usize, input: &'out mut [MaybeUninit], ) -> &'out mut [MaybeUninit]; fn num_bytes(&self, count: usize) -> usize; } unsafe impl<'i, T: 'i> VariableLen<'i> for &'i [T] where T: FlatSerializable<'i>, { #[inline(always)] unsafe fn try_ref(input: &'i [u8], count: usize) -> Result<(Self, &'i [u8]), WrapErr> { assert!(::TRIVIAL_COPY); let byte_len = T::MIN_LEN * count; if input.len() < byte_len { return Err(WrapErr::NotEnoughBytes(byte_len)); } let (bytes, rem) = input.split_at(byte_len); let bytes = bytes.as_ptr(); let field = ::std::slice::from_raw_parts(bytes.cast::(), count); debug_assert_eq!( bytes.add(byte_len) as usize, field.as_ptr().add(count) as usize ); Ok((field, rem)) } #[inline(always)] unsafe fn fill_slice<'out>( &self, count: usize, input: &'out mut [MaybeUninit], ) -> &'out mut [MaybeUninit] { assert!(::TRIVIAL_COPY); if !::TRIVIAL_COPY { return fill_slice_from_iter::(self.iter(), count, input); } let vals = &self[..count]; let size = ::MIN_LEN * vals.len(); let (out, rem) = input.split_at_mut(size); let bytes = vals.as_ptr().cast::>(); let bytes = std::slice::from_raw_parts(bytes, size); out.copy_from_slice(bytes); rem } #[inline(always)] fn num_bytes(&self, count: usize) -> usize { assert!(::TRIVIAL_COPY); if !::TRIVIAL_COPY { return len_of_iterable::(self.iter(), count); } ::std::mem::size_of::() * count } } unsafe impl<'i, T: 'i> VariableLen<'i> for Slice<'i, T> where T: FlatSerializable<'i> + Clone, { #[inline(always)] unsafe fn try_ref(input: &'i [u8], count: usize) -> Result<(Self, &'i [u8]), WrapErr> { if T::TRIVIAL_COPY { let (field, rem) = <&[T]>::try_ref(input, count)?; return Ok((Self::Slice(field), rem)); } let mut total_len = 0; let mut tmp = input; let mut old_ptr = input.as_ptr() as usize; for _ in 0..count { let (field, rem) = T::try_ref(tmp)?; debug_assert_eq!(rem.as_ptr() as usize - old_ptr, field.num_bytes()); let additional_len = aligning_len(rem.as_ptr() as _, T::REQUIRED_ALIGNMENT); if rem.len() < additional_len { return Err(WrapErr::NotEnoughBytes(additional_len)); } let rem = &rem[additional_len..]; debug_assert_eq!(rem.as_ptr() as usize % T::REQUIRED_ALIGNMENT, 0); let padded_len = rem.as_ptr() as usize - old_ptr; old_ptr = rem.as_ptr() as usize; tmp = rem; total_len += padded_len; } let (iter, rem) = input.split_at(total_len); debug_assert_eq!(rem.as_ptr() as usize, tmp.as_ptr() as usize); debug_assert_eq!(rem.len(), tmp.len()); Ok((Self::from_bytes(iter), rem)) } #[inline(always)] unsafe fn fill_slice<'out>( &self, count: usize, input: &'out mut [MaybeUninit], ) -> &'out mut [MaybeUninit] { if let (true, Self::Slice(values)) = (T::TRIVIAL_COPY, self) { return <&[T]>::fill_slice(values, count, input); } fill_slice_from_iter(self.iter(), count, input) } #[inline(always)] fn num_bytes(&self, count: usize) -> usize { if let (true, Self::Slice(values)) = (T::TRIVIAL_COPY, self) { return <&[T]>::num_bytes(values, count); } len_of_iterable(self.iter(), count) } } #[inline(always)] unsafe fn fill_slice_from_iter< 'i, 'out, T: FlatSerializable<'i>, V: ValOrRef, I: Iterator, >( iter: I, count: usize, mut input: &'out mut [MaybeUninit], ) -> &'out mut [MaybeUninit] { let mut filled = 0; for v in iter.take(count) { input = v.to_ref().fill_slice(input); let additional_len = aligning_len(input.as_ptr(), T::REQUIRED_ALIGNMENT); let (addition, rem) = input.split_at_mut(additional_len); addition.copy_from_slice(&[MaybeUninit::new(0); 8][..additional_len]); debug_assert_eq!(rem.as_ptr() as usize % T::REQUIRED_ALIGNMENT, 0); input = rem; filled += 1; } if filled < count { panic!("Not enough elements. Expected {count} found {filled}") } input } #[inline(always)] fn len_of_iterable<'i, T: FlatSerializable<'i>, V: ValOrRef, I: Iterator>( iter: I, count: usize, ) -> usize { let mut filled = 0; let mut len = 0; for v in iter.take(count) { filled += 1; len += v.to_ref().num_bytes(); if len % T::REQUIRED_ALIGNMENT != 0 { len += T::REQUIRED_ALIGNMENT - (len % T::REQUIRED_ALIGNMENT); } } if filled < count { panic!("Not enough elements. Expected {count} found {filled}") } len } #[inline(always)] fn aligning_len(ptr: *const MaybeUninit, align: usize) -> usize { let current_ptr = ptr as usize; if current_ptr % align == 0 { return 0; } align - (current_ptr % align) } trait ValOrRef { fn to_ref(&self) -> &T; } impl ValOrRef for T { fn to_ref(&self) -> &T { self } } impl ValOrRef for &T { fn to_ref(&self) -> &T { self } } #[cfg(test)] mod tests { use crate as flat_serialize; use flat_serialize_macro::{flat_serialize, FlatSerializable}; flat_serialize! { #[derive(Debug)] struct Basic<'input> { header: u64, data_len: u32, array: [u16; 3], data: [u8; self.data_len], data2: [[u8; 2]; self.data_len / 3], } } #[test] fn basic() { use crate::{FlatSerializable, Slice, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&33u64.to_ne_bytes()); bytes.extend_from_slice(&6u32.to_ne_bytes()); bytes.extend_from_slice(&202u16.to_ne_bytes()); bytes.extend_from_slice(&404u16.to_ne_bytes()); bytes.extend_from_slice(&555u16.to_ne_bytes()); bytes.extend_from_slice(&[1, 3, 5, 7, 9, 11]); bytes.extend_from_slice(&[4, 4, 95, 99]); let ( Basic { header, data_len, data, data2, array, }, rem, ) = unsafe { Basic::try_ref(&bytes).unwrap() }; assert_eq!( (header, data_len, array, &data, &data2, rem), ( 33, 6, [202, 404, 555], &Slice::Slice(&[1, 3, 5, 7, 9, 11][..]), &Slice::Slice(&[[4, 4], [95, 99]]), &[][..] ) ); let mut output = vec![]; Basic { header, data_len, data: data.clone(), data2: data2.clone(), array, } .fill_vec(&mut output); assert_eq!(output, bytes); let debug = format!( "{:?}", Basic { header, data_len, data, data2, array } ); assert_eq!(debug, "Basic { header: 33, data_len: 6, array: [202, 404, 555], data: [1, 3, 5, 7, 9, 11], data2: [[4, 4], [95, 99]] }"); assert_eq!(Basic::MIN_LEN, 18); assert_eq!(Basic::REQUIRED_ALIGNMENT, 8); assert_eq!(Basic::MAX_PROVIDED_ALIGNMENT, Some(1)); assert_eq!(Basic::TRIVIAL_COPY, false); for i in 0..bytes.len() - 1 { let res = unsafe { Basic::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } #[test] #[should_panic(expected = "range end index 5 out of range for slice of length 1")] fn bad_len1() { use crate::{FlatSerializable, Slice}; let mut output = vec![]; Basic { header: 1, data_len: 5, array: [0; 3], data: Slice::Slice(&[1]), data2: Slice::Slice(&[[2, 2]]), } .fill_vec(&mut output); } #[test] #[should_panic(expected = "range end index 1 out of range for slice of length 0")] fn bad_len2() { use crate::{FlatSerializable, Slice}; let mut output = vec![]; Basic { header: 1, data_len: 5, array: [0; 3], data: Slice::Slice(&[1, 2, 3, 4, 5]), data2: Slice::Slice(&[]), } .fill_vec(&mut output); } flat_serialize! { #[derive(Debug, PartialEq, Eq)] struct Optional { header: u64, optional_field: u32 if self.header != 1, non_optional_field: u16, } } const _TEST_NO_VARIABLE_LEN_NO_LIFETIME: Optional = Optional { header: 0, optional_field: None, non_optional_field: 0, }; #[test] fn optional_present() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&101010101u64.to_ne_bytes()); bytes.extend_from_slice(&30u32.to_ne_bytes()); bytes.extend_from_slice(&6u16.to_ne_bytes()); let ( Optional { header, optional_field, non_optional_field, }, rem, ) = unsafe { Optional::try_ref(&bytes).unwrap() }; assert_eq!( (header, optional_field, non_optional_field, rem), (101010101, Some(30), 6, &[][..]) ); let mut output = vec![]; Optional { header, optional_field, non_optional_field, } .fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { Optional::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } assert_eq!(Optional::MIN_LEN, 10); assert_eq!(Optional::REQUIRED_ALIGNMENT, 8); assert_eq!(Optional::MAX_PROVIDED_ALIGNMENT, Some(2)); assert_eq!(Optional::TRIVIAL_COPY, false); } #[test] fn optional_absent() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&1u64.to_ne_bytes()); bytes.extend_from_slice(&7u16.to_ne_bytes()); let ( Optional { header, optional_field, non_optional_field, }, rem, ) = unsafe { Optional::try_ref(&bytes).unwrap() }; assert_eq!( (header, optional_field, non_optional_field, rem), (1, None, 7, &[][..]) ); let mut output = vec![]; Optional { header, optional_field, non_optional_field, } .fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { Optional::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } flat_serialize! { #[derive(Debug)] struct Nested<'a> { prefix: u64, basic: Basic<'a>, } } #[test] fn nested() { use crate::{FlatSerializable, Slice, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&101010101u64.to_ne_bytes()); bytes.extend_from_slice(&33u64.to_ne_bytes()); bytes.extend_from_slice(&6u32.to_ne_bytes()); bytes.extend_from_slice(&202u16.to_ne_bytes()); bytes.extend_from_slice(&404u16.to_ne_bytes()); bytes.extend_from_slice(&555u16.to_ne_bytes()); bytes.extend_from_slice(&[1, 3, 5, 7, 9, 11]); bytes.extend_from_slice(&[3, 0, 104, 2]); let ( Nested { prefix, basic: Basic { header, data_len, array, data, data2, }, }, rem, ) = unsafe { Nested::try_ref(&bytes).unwrap() }; assert_eq!( (prefix, header, data_len, array, &data, &data2, rem), ( 101010101, 33, 6, [202, 404, 555], &Slice::Slice(&[1, 3, 5, 7, 9, 11][..]), &Slice::Slice(&[[3, 0], [104, 2]]), &[][..] ) ); let mut output = vec![]; Nested { prefix, basic: Basic { header, data_len, data, data2, array, }, } .fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { Nested::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } flat_serialize! { #[derive(Debug)] struct NestedOptional { present: u64, val: Optional if self.present > 2, } } #[test] fn nested_optional() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&3u64.to_ne_bytes()); { bytes.extend_from_slice(&0u64.to_ne_bytes()); bytes.extend_from_slice(&111111111u32.to_ne_bytes()); bytes.extend_from_slice(&0xf00fu16.to_ne_bytes()); bytes.extend_from_slice(&[77; 2]); } let (NestedOptional { present, val }, rem) = unsafe { NestedOptional::try_ref(&bytes).unwrap() }; assert_eq!( (present, &val, rem), ( 3, &Some(Optional { header: 0, optional_field: Some(111111111), non_optional_field: 0xf00f, }), &[77; 2][..], ) ); let mut output = vec![]; NestedOptional { present, val }.fill_vec(&mut output); assert_eq!(output, &bytes[..bytes.len() - 2]); for i in 0..bytes.len() - 3 { let res = unsafe { NestedOptional::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } assert_eq!(NestedOptional::MIN_LEN, 8); assert_eq!(NestedOptional::REQUIRED_ALIGNMENT, 8); assert_eq!(NestedOptional::MAX_PROVIDED_ALIGNMENT, Some(2)); assert_eq!(NestedOptional::TRIVIAL_COPY, false); } flat_serialize! { #[derive(Debug)] struct NestedSlice<'b> { num_vals: u64, // #[flat_serialize::flatten] vals: [Optional; self.num_vals], } } #[test] fn nested_slice() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&3u64.to_ne_bytes()); { bytes.extend_from_slice(&101010101u64.to_ne_bytes()); bytes.extend_from_slice(&30u32.to_ne_bytes()); bytes.extend_from_slice(&6u16.to_ne_bytes()); bytes.extend_from_slice(&[0; 2]); } { bytes.extend_from_slice(&1u64.to_ne_bytes()); bytes.extend_from_slice(&7u16.to_ne_bytes()); bytes.extend_from_slice(&[0; 6]); } { bytes.extend_from_slice(&0u64.to_ne_bytes()); bytes.extend_from_slice(&111111111u32.to_ne_bytes()); bytes.extend_from_slice(&0xf00fu16.to_ne_bytes()); bytes.extend_from_slice(&[0; 2]); } let (NestedSlice { num_vals, vals }, rem) = unsafe { NestedSlice::try_ref(&bytes).unwrap() }; let vals_vec: Vec<_> = vals.iter().collect(); assert_eq!( (num_vals, &*vals_vec, rem), ( 3, &[ Optional { header: 101010101, optional_field: Some(30), non_optional_field: 6, }, Optional { header: 1, optional_field: None, non_optional_field: 7, }, Optional { header: 0, optional_field: Some(111111111), non_optional_field: 0xf00f, }, ][..], &[][..], ) ); let mut output = vec![]; NestedSlice { num_vals, vals }.fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { NestedSlice::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } assert_eq!(NestedSlice::MIN_LEN, 8); assert_eq!(NestedSlice::REQUIRED_ALIGNMENT, 8); assert_eq!(NestedSlice::MAX_PROVIDED_ALIGNMENT, Some(8)); assert_eq!(NestedSlice::TRIVIAL_COPY, false); } flat_serialize! { #[derive(Debug, PartialEq, Eq)] enum BasicEnum<'input> { k: u64, First: 2 { data_len: u32, data: [u8; self.data_len], }, Fixed: 3 { array: [u16; 3], }, } } #[test] fn basic_enum1() { use crate::{FlatSerializable, Slice, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&2u64.to_ne_bytes()); bytes.extend_from_slice(&6u32.to_ne_bytes()); bytes.extend_from_slice(&[1, 3, 5, 7, 9, 11]); let (data_len, data, rem) = match unsafe { BasicEnum::try_ref(&bytes).unwrap() } { (BasicEnum::First { data_len, data }, rem) => (data_len, data, rem), _ => unreachable!(), }; assert_eq!( (data_len, &data, rem), (6, &Slice::Slice(&[1, 3, 5, 7, 9, 11][..]), &[][..]) ); let mut output = vec![]; BasicEnum::First { data_len, data }.fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { BasicEnum::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } #[test] fn basic_enum2() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&3u64.to_ne_bytes()); bytes.extend_from_slice(&3u16.to_ne_bytes()); bytes.extend_from_slice(&6u16.to_ne_bytes()); bytes.extend_from_slice(&9u16.to_ne_bytes()); bytes.extend_from_slice(&[7]); let (array, rem) = match unsafe { BasicEnum::try_ref(&bytes).unwrap() } { (BasicEnum::Fixed { array }, rem) => (array, rem), _ => unreachable!(), }; assert_eq!((array, rem), ([3, 6, 9], &[7][..])); let (array, rem) = match unsafe { BasicEnum::try_ref(&bytes).unwrap() } { (BasicEnum::Fixed { array }, rem) => (array, rem), _ => unreachable!(), }; assert_eq!((array, rem), ([3, 6, 9], &[7][..])); let mut output = vec![]; BasicEnum::Fixed { array }.fill_vec(&mut output); assert_eq!(output, &bytes[..bytes.len() - 1]); for i in 0..bytes.len() - 1 { let res = unsafe { BasicEnum::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } flat_serialize! { #[derive(Debug)] enum PaddedEnum<'input> { k: u8, First: 2 { padding: [u8; 3], data_len: u32, data: [u8; self.data_len], }, Fixed: 3 { padding: u8, array: [u16; 3], }, } } #[test] fn padded_enum1() { use crate::{FlatSerializable, Slice, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&2u8.to_ne_bytes()); bytes.extend_from_slice(&[0xf, 0xf, 0xf]); bytes.extend_from_slice(&6u32.to_ne_bytes()); bytes.extend_from_slice(&[1, 3, 5, 7, 9, 11]); let (padding, data_len, data, rem) = match unsafe { PaddedEnum::try_ref(&bytes).unwrap() } { ( PaddedEnum::First { padding, data_len, data, }, rem, ) => (padding, data_len, data, rem), _ => unreachable!(), }; assert_eq!( (padding, data_len, &data, rem), ( [0xf, 0xf, 0xf], 6, &Slice::Slice(&[1, 3, 5, 7, 9, 11][..]), &[][..] ) ); let mut output = vec![]; PaddedEnum::First { padding, data_len, data, } .fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { PaddedEnum::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } #[test] fn padded_enum2() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&3u8.to_ne_bytes()); bytes.extend_from_slice(&[0]); bytes.extend_from_slice(&3u16.to_ne_bytes()); bytes.extend_from_slice(&6u16.to_ne_bytes()); bytes.extend_from_slice(&9u16.to_ne_bytes()); bytes.extend_from_slice(&[7]); let (padding, array, rem) = match unsafe { PaddedEnum::try_ref(&bytes).unwrap() } { (PaddedEnum::Fixed { padding, array }, rem) => (padding, array, rem), _ => unreachable!(), }; assert_eq!((padding, array, rem), (0, [3, 6, 9], &[7][..])); let (padding, array, rem) = match unsafe { PaddedEnum::try_ref(&bytes).unwrap() } { (PaddedEnum::Fixed { padding, array }, rem) => (padding, array, rem), _ => unreachable!(), }; assert_eq!((padding, array, rem), (0, [3, 6, 9], &[7][..])); let mut output = vec![]; PaddedEnum::Fixed { padding, array }.fill_vec(&mut output); assert_eq!(output, &bytes[..bytes.len() - 1]); for i in 0..bytes.len() - 1 { let res = unsafe { PaddedEnum::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } flat_serialize! { #[derive(Debug)] struct ManyEnum<'input> { count: u64, enums: [BasicEnum<'input>; self.count], } } #[test] fn many_enum() { use crate::{FlatSerializable, Slice, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&4u64.to_ne_bytes()); { bytes.extend_from_slice(&2u64.to_ne_bytes()); bytes.extend_from_slice(&6u32.to_ne_bytes()); bytes.extend_from_slice(&[1, 3, 5, 7, 9, 11]); while bytes.len() % 8 != 0 { bytes.push(0) } } { bytes.extend_from_slice(&3u64.to_ne_bytes()); bytes.extend_from_slice(&3u16.to_ne_bytes()); bytes.extend_from_slice(&6u16.to_ne_bytes()); bytes.extend_from_slice(&9u16.to_ne_bytes()); while bytes.len() % 8 != 0 { bytes.push(0) } } { bytes.extend_from_slice(&2u64.to_ne_bytes()); bytes.extend_from_slice(&1u32.to_ne_bytes()); bytes.extend_from_slice(&[44u8]); while bytes.len() % 8 != 0 { bytes.push(0) } } { bytes.extend_from_slice(&2u64.to_ne_bytes()); bytes.extend_from_slice(&2u32.to_ne_bytes()); bytes.extend_from_slice(&[89u8, 123u8]); while bytes.len() % 8 != 0 { bytes.push(0) } } let (ManyEnum { count, enums }, rem) = unsafe { ManyEnum::try_ref(&bytes).unwrap() }; assert_eq!((count, rem), (4, &[][..])); let enums_vec: Vec<_> = enums.iter().collect(); assert_eq!( &*enums_vec, &[ BasicEnum::First { data_len: 6, data: Slice::Slice(&[1, 3, 5, 7, 9, 11]) }, BasicEnum::Fixed { array: [3, 6, 9] }, BasicEnum::First { data_len: 1, data: Slice::Slice(&[44u8]) }, BasicEnum::First { data_len: 2, data: Slice::Slice(&[89u8, 123]) }, ] ); let mut output = vec![]; ManyEnum { count, enums }.fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { ManyEnum::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } macro_rules! sub_macro { ( $(#[$attrs: meta])? struct $name: ident { $($field:ident : $typ: tt),* $(,)? } ) => { flat_serialize_macro::flat_serialize! { $(#[$attrs])? struct $name { $($field: $typ),* } } } } // test that sub_macros provide correct compilation sub_macro! { #[derive(Debug)] struct InMacro { a: u32, padding: [u8; 4], // with this commented out, the error should be on b b: f64, } } #[test] fn test_no_refrence() { flat_serialize! { struct NoLifetime { val: i64, } } let _: NoLifetime = NoLifetime { val: 3 }; flat_serialize! { struct NestedNoLifetime { nested: NoLifetime, } } let _: NestedNoLifetime = NestedNoLifetime { nested: NoLifetime { val: 3 }, }; flat_serialize! { enum ENoLifetime { tag: i64, Variant: 1 { val: i64, }, } } let _: ENoLifetime = ENoLifetime::Variant { val: 2 }; flat_serialize! { enum NestedENoLifetime { tag: i64, Variant: 2 { val: ENoLifetime, }, } } let _: NestedENoLifetime = NestedENoLifetime::Variant { val: ENoLifetime::Variant { val: 2 }, }; } macro_rules! check_size_align { (struct $($dec_life:lifetime)? { $( $(#[$attrs: meta])* $field:ident : $typ: tt $(<$life:lifetime>)?),* $(,)? } len: $min_len: expr, align: $required_alignment: expr, max: $max_provided_alignment: expr $(,)? ) => { { flat_serialize!{ struct SizeAlignTest $(<$dec_life>)? { $($(#[$attrs])* $field: $typ $(<$life>)?),* } }; assert_eq!(::MIN_LEN, $min_len, "length"); assert_eq!(::REQUIRED_ALIGNMENT, $required_alignment, "required"); assert_eq!(::MAX_PROVIDED_ALIGNMENT, $max_provided_alignment, "max provided"); assert_eq!(::TRIVIAL_COPY, false, "trivial copy"); } } } #[test] fn test_size_align_struct() { check_size_align!( struct { f: u8, } len: 1, align: 1, max: None, ); check_size_align!( struct { f: u16, } len: 2, align: 2, max: None, ); check_size_align!( struct { f: u32, } len: 4, align: 4, max: None, ); check_size_align!( struct { f: u64, } len: 8, align: 8, max: None, ); check_size_align!( struct { a: u64, b: u32, c: u16, } len: 8 + 4 + 2, align: 8, max: None, ); check_size_align!( struct { a: u32, b: u32, c: u32, } len: 4 + 4 + 4, align: 4, max: None, ); check_size_align!( struct { a: [u32; 3], } len: 4 * 3, align: 4, max: None, ); check_size_align!( struct 'a { a: u32, b: [u16; self.a], } len: 4, align: 4, max: Some(2), ); check_size_align!( struct 'a { a: u32, b: [u32; self.a], } len: 4, align: 4, max: Some(4), ); check_size_align!( struct 'a { a: u32, b: [u32; self.a], c: u32, } len: 4 + 4, align: 4, max: Some(4), ); flat_serialize! { struct NestedA { a: u32, b: u16, } } check_size_align!( struct { a: u32, b: NestedA, } len: 4 + (4 + 2), align: 4, max: None, ); check_size_align!( struct { a: u64, b: NestedA, } len: 8 + (4 + 2), align: 8, max: None, ); check_size_align!( struct { a: u64, b: NestedA, c: u8 } len: 8 + (4 + 2) + 1, align: 8, max: None, ); check_size_align!( struct { a: NestedA, b: u8, c: u8, f: NestedA, } len: (4 + 2) + 1 + 1 + (4 + 2), align: 4, max: None, ); flat_serialize! { struct NestedB<'input> { a: u32, b: [u16; self.a], } } check_size_align!( struct 'a { a: u32, b: NestedB<'a>, } len: 4 + (4), align: 4, max: Some(2), ); check_size_align!( struct 'a { a: u64, b: NestedB<'a>, } len: 8 + (4), align: 8, max: Some(2), ); check_size_align!( struct 'a { a: u64, b: NestedB<'a>, c: u8 } len: 8 + (4) + 1, align: 8, max: Some(1), ); check_size_align!( struct 'a { a: u8, b: u8, c: u8, d: u8, e: NestedB<'a>, } len: 4 + (4), align: 4, max: Some(2), ); } #[test] fn test_size_align_enum() { flat_serialize! { enum EnumA { tag: u32, A: 1 { a: u32, }, B: 2 { a: u16, }, } } check_size_align!( struct { a: EnumA, } len: (4 + 2), align: 4, max: Some(2), ); check_size_align!( struct { a: EnumA, b: u16, } len: (4 + 2) + 2, align: 4, max: Some(2), ); check_size_align!( struct { b: u64, a: EnumA, } len: 8 + (4 + 2), align: 8, max: Some(2), ); flat_serialize! { enum EnumB { tag: u32, A: 1 { a: [u8; 5], }, B: 2 { a: u16, }, } } check_size_align!( struct { a: EnumB, } len: (4 + 2), align: 4, max: Some(1), ); check_size_align!( struct { b: u64, a: EnumB, } len: 8 + (4 + 2), align: 8, max: Some(1), ); flat_serialize! { enum EnumC<'input> { tag: u64, A: 1 { a: u64, }, B: 2 { a: u16, b: [u16; self.a], }, } } check_size_align!( struct 'a { a: EnumC<'a>, } len: (8 + 2), align: 8, max: Some(2), ); check_size_align!( struct 'a { a: EnumC<'a>, b: u16, } len: (8 + 2) + 2, align: 8, max: Some(2), ); check_size_align!( struct 'a { b: u64, a: EnumC<'a>, } len: 8 + (8 + 2), align: 8, max: Some(2), ); flat_serialize! { enum EnumD<'input> { tag: u32, A: 1 { a: u16, }, B: 2 { a: u32, b: [u8; self.a], }, } } check_size_align!( struct 'a { a: EnumD<'a>, } len: (4 + 2), align: 4, max: Some(1), ); check_size_align!( struct 'a { a: EnumD<'a>, b: u8, } len: (4 + 2) + 1, align: 4, max: Some(1), ); check_size_align!( struct 'a { b: u64, a: EnumD<'a>, } len: 8 + (4 + 2), align: 8, max: Some(1), ); } #[derive(FlatSerializable)] #[allow(dead_code)] #[derive(Debug)] #[repr(C)] struct Foo { a: i32, b: i32, } const _: () = { fn check_flat_serializable_impl<'a, T: crate::FlatSerializable<'a>>() {} let _ = check_flat_serializable_impl::; let _ = check_flat_serializable_impl::<[Foo; 2]>; }; #[test] fn foo() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&33i32.to_ne_bytes()); bytes.extend_from_slice(&100000001i32.to_ne_bytes()); let (Foo { a, b }, rem) = unsafe { Foo::try_ref(&bytes).unwrap() }; assert_eq!((a, b, rem), (33, 100000001, &[][..]),); let mut output = vec![]; Foo { a, b }.fill_vec(&mut output); assert_eq!(output, bytes); assert_eq!(Foo::MIN_LEN, 8); assert_eq!(Foo::REQUIRED_ALIGNMENT, 4); assert_eq!(Foo::MAX_PROVIDED_ALIGNMENT, None); assert_eq!(Foo::TRIVIAL_COPY, true); for i in 0..bytes.len() - 1 { let res = unsafe { Foo::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } #[derive(FlatSerializable)] #[allow(dead_code)] #[repr(u16)] #[derive(Debug, Copy, Clone)] enum Bar { A = 0, B = 1111, } const _: () = { fn check_flat_serializable_impl<'a, T: crate::FlatSerializable<'a>>() {} let _ = check_flat_serializable_impl::; let _ = check_flat_serializable_impl::<[Bar; 2]>; }; #[test] fn fs_enum_a() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&0u16.to_ne_bytes()); let (val, rem) = unsafe { Bar::try_ref(&bytes).unwrap() }; assert_eq!((val as u16, rem), (Bar::A as u16, &[][..])); let mut output = vec![]; val.fill_vec(&mut output); assert_eq!(output, bytes); assert_eq!(Bar::MIN_LEN, 2); assert_eq!(Bar::REQUIRED_ALIGNMENT, 2); assert_eq!(Bar::MAX_PROVIDED_ALIGNMENT, None); assert_eq!(Bar::TRIVIAL_COPY, true); for i in 0..bytes.len() - 1 { let res = unsafe { Bar::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } #[test] fn fs_enum_b() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&1111u16.to_ne_bytes()); let (val, rem) = unsafe { Bar::try_ref(&bytes).unwrap() }; assert_eq!((val as u16, rem), (Bar::B as u16, &[][..])); let mut output = vec![]; val.fill_vec(&mut output); assert_eq!(output, bytes); for i in 0..bytes.len() - 1 { let res = unsafe { Bar::try_ref(&bytes[..i]) }; assert!(matches!(res, Err(WrapErr::NotEnoughBytes(..))), "{:?}", res); } } #[test] fn fs_enum_non() { use crate::{FlatSerializable, WrapErr}; let mut bytes = Vec::new(); bytes.extend_from_slice(&1u16.to_ne_bytes()); let res = unsafe { Bar::try_ref(&bytes) }; assert!(matches!(res, Err(WrapErr::InvalidTag(0)))); } } ================================================ FILE: crates/flat_serialize/flat_serialize_macro/Cargo.toml ================================================ [package] name = "flat_serialize_macro" version = "0.1.0" authors = ["Joshua Lockerman"] edition = "2021" [lib] proc-macro = true [dependencies] syn = {version="1.0", features=["extra-traits", "visit", "visit-mut", "full"]} quote = "1.0" proc-macro2 = "1.0" [features] default = [] print-generated = [] ================================================ FILE: crates/flat_serialize/flat_serialize_macro/src/lib.rs ================================================ use proc_macro::TokenStream; use proc_macro2::TokenStream as TokenStream2; use quote::{quote, quote_spanned}; use syn::{ parse_macro_input, punctuated::Punctuated, spanned::Spanned, visit_mut::VisitMut, Attribute, Expr, Field, Ident, Lifetime, Token, }; mod parser; #[proc_macro] pub fn flat_serialize(input: TokenStream) -> TokenStream { // Parse the input tokens into a syntax tree let input = parse_macro_input!(input as FlatSerialize); let expanded = match input { FlatSerialize::Struct(input) => flat_serialize_struct(input), FlatSerialize::Enum(input) => flat_serialize_enum(input), }; if cfg!(feature = "print-generated") { println!("{expanded}"); } expanded.into() } #[allow(clippy::large_enum_variant)] // only one of these are created, and it's on the stack enum FlatSerialize { Enum(FlatSerializeEnum), Struct(FlatSerializeStruct), } /// a `flat_serialize`d enum e.g. /// ```skip /// flat_serialize! { /// enum BasicEnum { /// k: u8, /// First: 2 { /// data_len: usize, /// data: [u8; self.data_len], /// }, /// Fixed: 3 { /// array: [u16; 3], /// }, /// } /// } /// ``` /// the body of the enum variants must be the a valid FlatSerializeStruct body struct FlatSerializeEnum { per_field_attrs: Vec, attrs: Vec, ident: Ident, lifetime: Option, tag: FlatSerializeField, variants: Punctuated, } struct FlatSerializeVariant { tag_val: Expr, body: FlatSerializeStruct, } /// a `flat_serialize`d struct e.g. /// ```skip /// flat_serialize! { /// #[derive(Debug)] /// struct Basic { /// header: u64, /// data_len: u32, /// array: [u16; 3], /// data: [u8; self.data_len], /// data2: [u8; self.data_len / 2], /// } /// } /// ``` /// the syntax is the same as a regular struct, except that it allows /// `self` expressions in the length of arrays; these will be represented as /// variable-length fields. We also interpret /// `#[flat_serialize::field_attr(fixed = "#[foo]", variable = "#[bar]"))]` as /// applying the attribute `#[foo]` to every fixed-length field of the struct, /// and `#[bar]` to every variable-length field. e.g. /// ```skip /// flat_serialize! { /// #[flat_serialize::field_attr(fixed = "#[foo]", variable = "#[bar]"))]` /// struct Struct { /// a: i32, /// b: i32, /// c: [u16; self.a] /// d: [u8; self.a] /// } /// ``` /// is equivalent to /// ```skip /// flat_serialize! { /// struct Struct { /// #[foo] /// a: i32, /// #[foo] /// b: i32, /// #[bar] /// c: [u16; self.a] /// #[bar] /// d: [u8; self.a] /// } /// ``` /// This can be useful when generating flat_serialize structs from a macro struct FlatSerializeStruct { per_field_attrs: Vec, attrs: Vec, ident: Ident, lifetime: Option, fields: Punctuated, } struct FlatSerializeField { field: Field, ty_without_lifetime: Option, // TODO is this mutually exclusive with `flatten` above? Should we make an // enum to select between them? length_info: Option, } /// a `#[flat_serialize::field_attr(fixed = "#[foo]", variable = "#[bar]"))]` /// attribute. The inner attribute(s) will be applied to each relevant field. struct PerFieldsAttr { fixed: Attribute, variable: Option, } /// how to find the length of a variable-length or optional field. struct VariableLenFieldInfo { ty: syn::Type, ty_without_lifetime: Option, len_expr: syn::Expr, // is an optional field instead of a general varlen field, len_expr should // eval to a boolean is_optional: bool, } #[allow(clippy::redundant_clone)] // triggers incorrectly fn flat_serialize_struct(input: FlatSerializeStruct) -> TokenStream2 { let ident = input.ident.clone(); let ref_def = { let alignment_check = input.alignment_check(quote!(0), quote!(8)); let trait_check = input.fn_trait_check(); let required_alignment = input.fn_required_alignment(); let max_provided_alignment = input.fn_max_provided_alignment(); let min_len = input.fn_min_len(); // if we ever want to force #[repr(C)] we can use this code to derive // TRIVIAL_COPY from the struct fields let _const_len = input.fields.iter().map(|f| { if f.length_info.is_some() { quote!(false) } else { let ty = &f.ty; quote!( <#ty as flat_serialize::FlatSerializable>::TRIVIAL_COPY ) } }); let lifetime = input.lifetime.as_ref().map(|lifetime| { quote! { #lifetime } }); let try_ref = input.fn_try_ref(lifetime.as_ref()); let fill_slice = input.fn_fill_slice(); let len = input.fn_len(); let field_names = input.fields.iter().map(|f| &f.ident); let field_names1 = field_names.clone(); let make_owned = input.fields.iter().map(|f| f.make_owned()); let into_owned = input.fields.iter().map(|f| f.into_owned()); let fields = input .fields .iter() .map(|f| f.declaration(true, lifetime.as_ref(), input.per_field_attrs.iter())); let lifetime_args = input.lifetime.as_ref().map(|lifetime| { quote! { <#lifetime> } }); let ref_liftime = lifetime_args.clone().unwrap_or_else(|| quote! { <'a> }); let rl = lifetime.clone().unwrap_or_else(|| quote! { 'a }); let owned_lifetime = if lifetime_args.is_some() { Some(quote!( <'static> )) } else { None }; let attrs = &*input.attrs; quote! { #[derive(Clone)] #(#attrs)* pub struct #ident #lifetime_args { #(#fields)* } // alignment assertions #[allow(unused_assignments)] const _: () = #alignment_check; #trait_check unsafe impl #ref_liftime flat_serialize::FlatSerializable #ref_liftime for #ident #lifetime_args { #required_alignment #max_provided_alignment #min_len // cannot be TRIVIAL_COPY unless the struct is #[repr(C)] const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Slice<#rl, #ident #lifetime_args>; type OWNED = #ident #owned_lifetime; #try_ref #fill_slice #len fn make_owned(&mut self) { let Self { #(#field_names,)* } = self; #(#make_owned)* } fn into_owned(self) -> Self::OWNED { let Self { #(#field_names1,)* } = self; Self::OWNED { #(#into_owned)* } } } } }; let expanded = quote! { #ref_def }; expanded } fn flat_serialize_enum(input: FlatSerializeEnum) -> TokenStream2 { let alignment_check = input.alignment_check(); let uniqueness_check = input.uniqueness_check(); let trait_check = input.fn_trait_check(); let required_alignment = input.fn_required_alignment(); let max_provided_alignment = input.fn_max_provided_alignment(); let min_len = input.fn_min_len(); let make_owned = input.variants.iter().map(|v| { let variant = &v.body.ident; let fields = v.body.fields.iter().map(|f| &f.ident); let make = v.body.fields.iter().map(|f| f.make_owned()); quote! { Self::#variant { #(#fields,)* } => { #(#make)* }, } }); let into_owned = input.variants.iter().map(|v| { let variant = &v.body.ident; let fields = v.body.fields.iter().map(|f| &f.ident); let into = v.body.fields.iter().map(|f| f.into_owned()); quote! { Self::#variant { #(#fields,)* } => Self::OWNED::#variant { #(#into)* }, } }); let lifetime = input.lifetime.as_ref().map(|lifetime| quote! { #lifetime }); let lifetime_args = input .lifetime .as_ref() .map(|lifetime| quote! { <#lifetime> }); let ref_liftime = lifetime_args.clone().unwrap_or_else(|| quote! { <'a> }); let rl = lifetime.clone().unwrap_or_else(|| quote! { 'a }); let owned_lifetime = if lifetime_args.is_some() { Some(quote!( <'static> )) } else { None }; let try_ref = input.fn_try_ref(lifetime.as_ref()); let fill_slice = input.fn_fill_slice(); let len = input.fn_len(); let body = input.variants(lifetime.as_ref()); let ident = &input.ident; let attrs = &*input.attrs; quote! { #[derive(Clone)] #(#attrs)* #body #alignment_check #uniqueness_check #trait_check unsafe impl #ref_liftime flat_serialize::FlatSerializable #ref_liftime for #ident #lifetime_args { #required_alignment #max_provided_alignment #min_len // cannot be TRIVIAL_COPY since the rust enum layout is unspecified const TRIVIAL_COPY: bool = false; type SLICE = flat_serialize::Slice<#rl, #ident #lifetime_args>; type OWNED = #ident #owned_lifetime; #try_ref #fill_slice #len fn make_owned(&mut self) { match self { #(#make_owned)* } } fn into_owned(self) -> Self::OWNED { match self { #(#into_owned)* } } } } } impl VariableLenFieldInfo { fn len_from_bytes(&self) -> TokenStream2 { let mut lfb = SelfReplacer(|name| syn::parse_quote! { #name.clone().unwrap() }); let mut len = self.len_expr.clone(); lfb.visit_expr_mut(&mut len); quote! { #len } } fn counter_expr(&self) -> TokenStream2 { let mut ce = SelfReplacer(|name| syn::parse_quote! { (*#name) }); let mut len = self.len_expr.clone(); ce.visit_expr_mut(&mut len); quote! { #len } } fn err_size_expr(&self) -> TokenStream2 { let mut ese = SelfReplacer(|name| { syn::parse_quote! { match #name { Some(#name) => #name, None => return 0usize, } } }); let mut len = self.len_expr.clone(); ese.visit_expr_mut(&mut len); quote! { #len } } } struct SelfReplacer syn::Expr>(F); impl syn::Expr> VisitMut for SelfReplacer { fn visit_expr_mut(&mut self, expr: &mut syn::Expr) { if let syn::Expr::Field(field) = expr { if let syn::Expr::Path(path) = &mut *field.base { if path.path.segments[0].ident == "self" { let name = match &field.member { syn::Member::Named(name) => name, syn::Member::Unnamed(_) => panic!("unnamed fields not supported"), }; *expr = self.0(name) } } } else { syn::visit_mut::visit_expr_mut(self, expr) } } } struct TryRefBody { vars: TokenStream2, body: TokenStream2, set_fields: TokenStream2, err_size: TokenStream2, } impl FlatSerializeEnum { fn variants(&self, lifetime: Option<&TokenStream2>) -> TokenStream2 { let id = &self.ident; let variants = self.variants.iter().map(|variant| { let fields = variant .body .fields .iter() .map(|f| f.declaration(false, lifetime, self.per_field_attrs.iter())); let ident = &variant.body.ident; quote! { #ident { #(#fields)* }, } }); let args = lifetime.map(|lifetime| quote! { <#lifetime> }); quote! { pub enum #id #args { #(#variants)* } } } fn uniqueness_check(&self) -> TokenStream2 { let variants = self.variants.iter().map(|variant| { let ident = &variant.body.ident; let tag_val = &variant.tag_val; quote! { #ident = #tag_val, } }); quote! { // uniqueness check const _: () = { #[allow(dead_code)] enum UniquenessCheck { #(#variants)* } }; } } fn alignment_check(&self) -> TokenStream2 { let tag_check = self.tag.alignment_check(); let variant_checks = self.variants.iter().map(|v| { v.body .alignment_check(quote!(current_size), quote!(min_align)) }); quote! { // alignment assertions #[allow(unused_assignments)] const _: () = { use std::mem::{align_of, size_of}; let mut current_size = 0; let mut min_align = 8; #tag_check #(#variant_checks)* }; } } fn fn_trait_check(&self) -> TokenStream2 { let tag_check = self.tag.trait_check(); let checks = self.variants.iter().map(|v| v.body.fn_trait_check()); quote! { const _: () = { #tag_check #( const _: () = { #checks }; )* }; } } fn fn_required_alignment(&self) -> TokenStream2 { let tag_alignment = self.tag.required_alignment(); let alignments = self.variants.iter().map(|v| { let alignments = v.body.fields.iter().map(|f| f.required_alignment()); quote! { let mut required_alignment = #tag_alignment; #( let alignment = #alignments; if alignment > required_alignment { required_alignment = alignment; } )* required_alignment } }); quote! { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment: usize = #tag_alignment; #( let alignment: usize = { #alignments }; if alignment > required_alignment { required_alignment = alignment; } )* required_alignment }; } } fn fn_max_provided_alignment(&self) -> TokenStream2 { let min_align = self.tag.max_provided_alignment(); let min_align = quote! { match #min_align { Some(a) => Some(a), None => Some(8), } }; let min_size = self.tag.min_len(); let alignments = self.variants.iter().map(|v| { let alignments = v.body.fields.iter().map(|f| f.max_provided_alignment()); let sizes = v.body.fields.iter().map(|f| f.min_len()); quote! { let mut min_align: Option = #min_align; #( let alignment = #alignments; match (alignment, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } )* let variant_size: usize = #min_size #(+ #sizes)*; let effective_alignment = match min_align { Some(align) => align, None => 8, }; if variant_size % 8 == 0 && effective_alignment >= 8 { 8 } else if variant_size % 4 == 0 && effective_alignment >= 4 { 4 } else if variant_size % 2 == 0 && effective_alignment >= 2 { 2 } else { 1 } } }); quote! { const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::{align_of, size_of}; let mut min_align: usize = match #min_align { None => 8, Some(align) => align, }; #( let variant_alignment: usize = { #alignments }; if variant_alignment < min_align { min_align = variant_alignment } )* let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } }; } } fn fn_min_len(&self) -> TokenStream2 { let tag_size = self.tag.min_len(); let sizes = self.variants.iter().map(|v| { let sizes = v.body.fields.iter().map(|f| f.min_len()); quote! { let mut size: usize = #tag_size; #(size += #sizes;)* size } }); quote! { const MIN_LEN: usize = { use std::mem::size_of; let mut size: Option = None; #( let variant_size = { #sizes }; size = match size { None => Some(variant_size), Some(size) if size > variant_size => Some(variant_size), Some(size) => Some(size), }; )* match size { Some(size) => size, None => #tag_size, } }; } } fn fn_try_ref(&self, lifetime: Option<&TokenStream2>) -> TokenStream2 { let break_label = syn::Lifetime::new("'tryref_tag", proc_macro2::Span::call_site()); let try_wrap_tag = self.tag.try_wrap(&break_label); let id = &self.ident; let tag_ty = &self.tag.ty; let bodies = self.variants.iter().enumerate().map(|(i, v)| { let tag_val = &v.tag_val; let variant = &v.body.ident; let break_label = syn::Lifetime::new(&format!("'tryref_{i}"), proc_macro2::Span::call_site()); let TryRefBody { vars, body, set_fields, err_size, } = v .body .fn_try_ref_body(&break_label); quote! { Some(#tag_val) => { #vars #break_label: loop { #body let _ref = #id::#variant { #set_fields }; return Ok((_ref, input)) } return Err(flat_serialize::WrapErr::NotEnoughBytes(std::mem::size_of::<#tag_ty>() #err_size)) } } }); let tag_ident = self.tag.ident.as_ref().unwrap(); quote! { #[allow(unused_assignments, unused_variables, unreachable_code)] #[inline(always)] unsafe fn try_ref(mut input: & #lifetime [u8]) -> Result<(Self, & #lifetime [u8]), flat_serialize::WrapErr> { let __packet_macro_read_len = 0usize; let mut #tag_ident = None; 'tryref_tag: loop { #try_wrap_tag; match #tag_ident { #(#bodies),* _ => return Err(flat_serialize::WrapErr::InvalidTag(0)), } } //TODO Err(flat_serialize::WrapErr::NotEnoughBytes(::std::mem::size_of::<#tag_ty>())) } } } fn fn_fill_slice(&self) -> TokenStream2 { let tag_ty = &self.tag.ty; let tag_ident = self.tag.ident.as_ref().unwrap(); let fill_slice_tag = self.tag.fill_slice(); let id = &self.ident; let bodies = self.variants.iter().map(|v| { let tag_val = &v.tag_val; let variant = &v.body.ident; let (fields, fill_slice_with) = v.body.fill_slice_body(); quote! { #id::#variant { #fields } => { let #tag_ident: &#tag_ty = &#tag_val; #fill_slice_tag #fill_slice_with } } }); quote! { #[allow(unused_assignments, unused_variables)] unsafe fn fill_slice<'out>(&self, input: &'out mut [std::mem::MaybeUninit]) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.num_bytes(); let (mut input, rem) = input.split_at_mut(total_len); match self { #(#bodies),* } debug_assert_eq!(input.len(), 0); rem } } } fn fn_len(&self) -> TokenStream2 { let tag_ty = &self.tag.ty; let tag_size = quote! { ::std::mem::size_of::<#tag_ty>() }; let id = &self.ident; let bodies = self.variants.iter().map(|v| { let variant = &v.body.ident; let size = v.body.fields.iter().map(|f| f.size_fn()); let fields = v.body.fields.iter().map(|f| f.ident.as_ref().unwrap()); quote! { #id::#variant { #(#fields),* } => { #tag_size #(+ #size)* }, } }); quote! { #[allow(unused_assignments, unused_variables)] fn num_bytes(&self) -> usize { match self { #(#bodies)* } } } } } impl FlatSerializeStruct { fn alignment_check(&self, start: TokenStream2, min_align: TokenStream2) -> TokenStream2 { let checks = self.fields.iter().map(|f| f.alignment_check()); quote! { { use std::mem::{align_of, size_of}; let mut current_size = #start; let mut min_align = #min_align; #(#checks)* } } } fn fn_trait_check(&self) -> TokenStream2 { let checks = self.fields.iter().map(|f| f.trait_check()); quote! { const _: () = { #(#checks)* }; } } fn fn_required_alignment(&self) -> TokenStream2 { let alignments = self.fields.iter().map(|f| f.required_alignment()); quote! { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment = 1; #( let alignment = #alignments; if alignment > required_alignment { required_alignment = alignment; } )* required_alignment }; } } fn fn_max_provided_alignment(&self) -> TokenStream2 { let alignments = self.fields.iter().map(|f| f.max_provided_alignment()); quote! { const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::align_of; let mut min_align: Option = None; #( let ty_align = #alignments; match (ty_align, min_align) { (None, _) => (), (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => (), } )* match min_align { None => None, Some(min_align) => { let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } }, } }; } } fn fn_min_len(&self) -> TokenStream2 { let sizes = self.fields.iter().map(|f| f.min_len()); quote! { const MIN_LEN: usize = { use std::mem::size_of; let mut size = 0; #(size += #sizes;)* size }; } } fn fn_try_ref(&self, lifetime: Option<&TokenStream2>) -> TokenStream2 { let break_label = syn::Lifetime::new("'tryref", proc_macro2::Span::call_site()); let id = &self.ident; let TryRefBody { vars, body, set_fields, err_size, } = self.fn_try_ref_body(&break_label); quote! { #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref(mut input: & #lifetime [u8]) -> Result<(Self, & #lifetime [u8]), flat_serialize::WrapErr> { if input.len() < Self::MIN_LEN { return Err(flat_serialize::WrapErr::NotEnoughBytes(Self::MIN_LEN)) } let __packet_macro_read_len = 0usize; #vars #break_label: loop { #body let _ref = #id { #set_fields }; return Ok((_ref, input)) } Err(flat_serialize::WrapErr::NotEnoughBytes(0 #err_size)) } } } fn fn_try_ref_body(&self, break_label: &syn::Lifetime) -> TryRefBody { let field_names = self.fields.iter().map(|f| &f.ident); let ty1 = self.fields.iter().map(|f| f.local_ty()); let field1 = field_names.clone(); let field2 = field_names.clone(); let field_setters = self.fields.iter().map(|field| { let name = &field.ident; if field.is_optional() { quote! { #name } } else { quote! { #name.unwrap() } } }); let vars = quote!( #(let mut #field1: #ty1 = None;)* ); let try_wrap_fields = self.fields.iter().map(|f| f.try_wrap(break_label)); let body = quote! ( #(#try_wrap_fields)* ); let set_fields = quote!( #(#field2: #field_setters),* ); let err_size = self.fields.iter().map(|f| f.err_size()); let err_size = quote!( #( + #err_size)* ); TryRefBody { vars, body, set_fields, err_size, } } fn fn_fill_slice(&self) -> TokenStream2 { let id = &self.ident; let (fields, fill_slice_with) = self.fill_slice_body(); quote! { #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn fill_slice<'out>(&self, input: &'out mut [std::mem::MaybeUninit]) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.num_bytes(); let (mut input, rem) = input.split_at_mut(total_len); let #id { #fields } = self; #fill_slice_with debug_assert_eq!(input.len(), 0); rem } } } fn fill_slice_body(&self) -> (TokenStream2, TokenStream2) { //FIXME assert multiple values of counters are equal... let fill_slice_with = self.fields.iter().map(|f| f.fill_slice()); let fill_slice_with = quote!( #(#fill_slice_with);* ); let field = self.fields.iter().map(|f| f.ident.as_ref().unwrap()); let fields = quote!( #(#field),* ); (fields, fill_slice_with) } fn fn_len(&self) -> TokenStream2 { let size = self.fields.iter().map(|f| f.size_fn()); let field = self.fields.iter().map(|f| f.ident.as_ref().unwrap()); let id = &self.ident; quote! { #[allow(unused_assignments, unused_variables)] #[inline(always)] fn num_bytes(&self) -> usize { let #id { #(#field),* } = self; 0usize #(+ #size)* } } } } impl FlatSerializeField { fn alignment_check(&self) -> TokenStream2 { let current_size = quote!(current_size); let min_align = quote!(min_align); match &self.length_info { None => { let ty = self.ty_without_lifetime(); quote_spanned! {self.ty.span()=> if (#current_size) % <#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT != 0 { panic!("unaligned field: the current size of the data is not a multiple of this type's alignment") } if <#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT > #min_align { panic!("unaligned field: an earlier variable-length field could mis-align this field") } #current_size += <#ty as flat_serialize::FlatSerializable>::MIN_LEN; #min_align = match <#ty as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT { Some(align) if align < #min_align => align, _ => #min_align, }; } } Some(info) => { let ty = info.ty_without_lifetime(); quote_spanned! {self.ty.span()=> if (#current_size) % <#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT != 0 { panic!("unaligned field: the current size of the data is not a multiple of this type's alignment") } if <#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT > #min_align { panic!("unaligned field: an earlier variable-length field could mis-align this field") } if <#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT < #min_align { #min_align = <#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT } #min_align = match <#ty as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT { Some(align) if align < #min_align => align, _ => #min_align, }; } } } } fn trait_check(&self) -> TokenStream2 { let (ty, needs_lifetime) = match (&self.ty_without_lifetime, &self.length_info) { ( _, Some(VariableLenFieldInfo { ty_without_lifetime: Some(ty), .. }), ) => (ty.clone(), true), (_, Some(VariableLenFieldInfo { ty, .. })) => (quote! { #ty }, false), (Some(ty), _) => (ty.clone(), true), _ => { let ty = &self.ty; (quote! { #ty }, false) } }; let lifetime = needs_lifetime.then(|| quote! { <'static> }); let name = self.ident.as_ref().unwrap(); // based on static_assertions // TODO add ConstLen assertion if type is in var-len position? quote_spanned! {self.ty.span()=> fn #name<'test, T: flat_serialize::FlatSerializable<'test>>() {} let _ = #name::<#ty #lifetime>; } } fn required_alignment(&self) -> TokenStream2 { let ty = match &self.length_info { None => self.ty_without_lifetime(), Some(info) => info.ty_without_lifetime(), }; quote_spanned! {self.ty.span()=> <#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT } } fn max_provided_alignment(&self) -> TokenStream2 { match &self.length_info { None => { let ty = self.ty_without_lifetime(); quote_spanned! {self.ty.span()=> <#ty as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT } } Some( info @ VariableLenFieldInfo { is_optional: true, .. }, ) => { let ty = info.ty_without_lifetime(); // fields after an optional field cannot be aligned to more than // the field is in the event the field is present, so if the // field does not provide a max alignment (i.e. it's fixed-len) // use that to determine what the max alignment is. quote_spanned! {self.ty.span()=> { let ty_provied = <#ty as flat_serialize::FlatSerializable>::MAX_PROVIDED_ALIGNMENT; match ty_provied { Some(align) => Some(align), None => Some(<#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT), } } } } Some( info @ VariableLenFieldInfo { is_optional: false, .. }, ) => { let ty = info.ty_without_lifetime(); // for variable length slices we only need to check the required // alignment, not the max-provided: TRIVIAL_COPY types won't // have a max-provided alignment, while other ones will be // padded out to their natural alignment. quote_spanned! {self.ty.span()=> { Some(<#ty as flat_serialize::FlatSerializable>::REQUIRED_ALIGNMENT) } } } } } fn min_len(&self) -> TokenStream2 { match &self.length_info { None => { let ty = self.ty_without_lifetime(); quote_spanned! {self.ty.span()=> <#ty as flat_serialize::FlatSerializable>::MIN_LEN } } Some(..) => quote_spanned! {self.ty.span()=> 0 }, } } fn try_wrap(&self, break_label: &syn::Lifetime) -> TokenStream2 { let ident = self.ident.as_ref().unwrap(); match &self.length_info { Some( info @ VariableLenFieldInfo { is_optional: false, .. }, ) => { let count = info.len_from_bytes(); quote! { { let count = (#count) as usize; let (field, rem) = match <_ as flat_serialize::VariableLen <'_ >>::try_ref(input, count) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => return Err(flat_serialize::WrapErr::InvalidTag(__packet_macro_read_len + offset)), Err(..) => break #break_label }; input = rem; #ident = Some(field); } } } Some( info @ VariableLenFieldInfo { is_optional: true, .. }, ) => { let is_present = info.len_from_bytes(); let ty = info.ty_without_lifetime(); quote! { if #is_present { let (field, rem) = match <#ty>::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => return Err(flat_serialize::WrapErr::InvalidTag(__packet_macro_read_len + offset)), Err(..) => break #break_label }; input = rem; #ident = Some(field); } } } None => { let ty = self.ty_without_lifetime(); quote! { { let (field, rem) = match <#ty>::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => return Err(flat_serialize::WrapErr::InvalidTag(__packet_macro_read_len + offset)), Err(..) => break #break_label }; input = rem; #ident = Some(field); } } } } } fn fill_slice(&self) -> TokenStream2 { let ident = self.ident.as_ref().unwrap(); match &self.length_info { Some( info @ VariableLenFieldInfo { is_optional: false, .. }, ) => { let count = info.counter_expr(); // TODO this may not elide all bounds checks quote! { unsafe { let count = (#count) as usize; input = <_ as flat_serialize::VariableLen<'_>>::fill_slice(#ident, count, input); } } } Some( info @ VariableLenFieldInfo { is_optional: true, .. }, ) => { let is_present = info.counter_expr(); let ty = &info.ty; quote! { unsafe { if #is_present { let #ident: &#ty = #ident.as_ref().unwrap(); input = #ident.fill_slice(input); } } } } None => { quote! { unsafe { input = #ident.fill_slice(input); } } } } } fn err_size(&self) -> TokenStream2 { match &self.length_info { Some( info @ VariableLenFieldInfo { is_optional: false, .. }, ) => { let count = info.err_size_expr(); let ty = info.ty_without_lifetime(); quote! { (|| <#ty>::MIN_LEN * (#count) as usize)() } } Some( info @ VariableLenFieldInfo { is_optional: true, .. }, ) => { let is_present = info.err_size_expr(); let ty = info.ty_without_lifetime(); quote! { (|| if #is_present { <#ty>::MIN_LEN } else { 0 })() } } None => { let ty = &self.ty_without_lifetime(); quote! { <#ty>::MIN_LEN } } } } fn exposed_ty(&self, lifetime: Option<&TokenStream2>) -> TokenStream2 { match &self.length_info { None => { let nominal_ty = &self.ty; quote_spanned! {self.field.span()=> #nominal_ty } } Some(VariableLenFieldInfo { is_optional: false, ty, .. }) => quote_spanned! {self.field.span()=> <#ty as flat_serialize::FlatSerializable<#lifetime>>::SLICE }, Some(VariableLenFieldInfo { is_optional: true, ty, .. }) => { quote_spanned! {self.field.span()=> Option<#ty> } } } } fn local_ty(&self) -> TokenStream2 { match &self.length_info { None => { let ty = &self.ty; quote! { Option<#ty> } } Some(VariableLenFieldInfo { is_optional: false, ty, .. }) => { quote! { Option<<#ty as flat_serialize::FlatSerializable<'_>>::SLICE> } } Some(VariableLenFieldInfo { is_optional: true, ty, .. }) => { quote! { Option<#ty> } } } } fn size_fn(&self) -> TokenStream2 { let ident = self.ident.as_ref().unwrap(); match &self.length_info { Some( info @ VariableLenFieldInfo { is_optional: false, .. }, ) => { let count = info.counter_expr(); quote! { (<_ as flat_serialize::VariableLen<'_>>::num_bytes(#ident, (#count) as usize)) } } Some( info @ VariableLenFieldInfo { is_optional: true, .. }, ) => { let ty = self.ty_without_lifetime(); let is_present = info.counter_expr(); quote! { (if #is_present { <#ty as flat_serialize::FlatSerializable>::num_bytes(#ident.as_ref().unwrap()) } else { 0 }) } } None => { let nominal_ty = self.ty_without_lifetime(); quote!( <#nominal_ty as flat_serialize::FlatSerializable>::num_bytes(#ident) ) } } } fn make_owned(&self) -> TokenStream2 { let ident = self.ident.as_ref().unwrap(); match &self.length_info { Some(VariableLenFieldInfo { is_optional: false, .. }) => { quote! { flat_serialize::Slice::make_owned(#ident); } } Some(VariableLenFieldInfo { is_optional: true, .. }) => { let ty = self.ty_without_lifetime(); quote! { #ident.as_mut().map(|v| <#ty as flat_serialize::FlatSerializable>::make_owned(v)); } } None => { let nominal_ty = self.ty_without_lifetime(); quote!( <#nominal_ty as flat_serialize::FlatSerializable>::make_owned(#ident); ) } } } #[allow(clippy::wrong_self_convention)] fn into_owned(&self) -> TokenStream2 { let ident = self.ident.as_ref().unwrap(); match &self.length_info { Some(VariableLenFieldInfo { is_optional: false, .. }) => { quote! { #ident: flat_serialize::Slice::into_owned(#ident), } } Some(VariableLenFieldInfo { is_optional: true, .. }) => { let ty = self.ty_without_lifetime(); quote! { #ident: #ident.map(|v| <#ty as flat_serialize::FlatSerializable>::into_owned(v)), } } None => { let nominal_ty = self.ty_without_lifetime(); quote!( #ident: <#nominal_ty as flat_serialize::FlatSerializable>::into_owned(#ident), ) } } } fn declaration<'a, 'b: 'a>( &'b self, is_pub: bool, lifetime: Option<&TokenStream2>, pf_attrs: impl Iterator + 'a, ) -> TokenStream2 { let name = self.ident.as_ref().unwrap(); let attrs = self.attrs.iter(); let pub_marker = is_pub.then(|| quote! { pub }); let ty = self.exposed_ty(lifetime); let per_field_attrs = self.per_field_attrs(pf_attrs); quote! { #(#per_field_attrs)* #(#attrs)* #pub_marker #name: #ty, } } fn per_field_attrs<'a, 'b: 'a>( &'b self, attrs: impl Iterator + 'a, ) -> impl Iterator + 'a { attrs.map(move |attr| match &self.length_info { None => { let attr = &attr.fixed; quote! { #attr } } Some(_) => match &attr.variable { Some(attr) => quote! { #attr }, None => quote! {}, }, }) } fn ty_without_lifetime(&self) -> TokenStream2 { match &self.ty_without_lifetime { None => { let ty = &self.ty; quote! { #ty } } Some(ty) => ty.clone(), } } fn is_optional(&self) -> bool { matches!( self.length_info, Some(VariableLenFieldInfo { is_optional: true, .. }) ) } } impl VariableLenFieldInfo { fn ty_without_lifetime(&self) -> TokenStream2 { match &self.ty_without_lifetime { None => { let ty = &self.ty; quote! { #ty } } Some(ty) => ty.clone(), } } } #[proc_macro_derive(FlatSerializable)] pub fn flat_serializable_derive(input: TokenStream) -> TokenStream { let input: syn::DeriveInput = syn::parse(input).unwrap(); let name = input.ident; let s = match input.data { syn::Data::Enum(e) => { let repr: Vec<_> = input .attrs .iter() .flat_map(|attr| { let meta = match attr.parse_meta() { Ok(meta) => meta, _ => return None, }; let has_repr = meta.path().get_ident().is_some_and(|id| id == "repr"); if !has_repr { return None; } attr.parse_args().ok().and_then(|ident: Ident| { if ident == "u8" || ident == "u16" || ident == "u32" || ident == "u64" { return Some(ident); } None }) }) .collect(); if repr.len() != 1 { return quote_spanned! {e.enum_token.span()=> compile_error!{"FlatSerializable only allowed on #[repr(u..)] enums without variants"} }.into(); } let all_unit = e .variants .iter() .all(|variant| matches!(variant.fields, syn::Fields::Unit)); if !all_unit { return quote_spanned! {e.enum_token.span()=> compile_error!{"FlatSerializable only allowed on until enums"} } .into(); } let variant = e.variants.iter().map(|v| &v.ident); let variant2 = variant.clone(); let const_name = variant.clone(); let repr = &repr[0]; let out = quote! { unsafe impl<'i> flat_serialize::FlatSerializable<'i> for #name { const MIN_LEN: usize = std::mem::size_of::(); const REQUIRED_ALIGNMENT: usize = std::mem::align_of::(); const MAX_PROVIDED_ALIGNMENT: Option = None; const TRIVIAL_COPY: bool = true; type SLICE = flat_serialize::Slice<'i, #name>; type OWNED = Self; #[inline(always)] #[allow(non_upper_case_globals)] unsafe fn try_ref(input: &'i [u8]) -> Result<(Self, &'i [u8]), flat_serialize::WrapErr> { let size = std::mem::size_of::(); if input.len() < size { return Err(flat_serialize::WrapErr::NotEnoughBytes(size)) } let (field, rem) = input.split_at(size); let field = field.as_ptr().cast::<#repr>(); #( const #const_name: #repr = #name::#variant2 as #repr; )* let field = field.read_unaligned(); let field = match field { #(#variant => #name::#variant,)* _ => return Err(flat_serialize::WrapErr::InvalidTag(0)), }; Ok((field, rem)) } #[inline(always)] unsafe fn fill_slice<'out>(&self, input: &'out mut [std::mem::MaybeUninit]) -> &'out mut [std::mem::MaybeUninit] { let size = std::mem::size_of::(); let (input, rem) = input.split_at_mut(size); let bytes = (self as *const Self).cast::>(); let bytes = std::slice::from_raw_parts(bytes, size); input.copy_from_slice(bytes); rem } #[inline(always)] fn num_bytes(&self) -> usize { std::mem::size_of::() } #[inline(always)] fn make_owned(&mut self) { // nop } #[inline(always)] fn into_owned(self) -> Self::OWNED { self } } }; return out.into(); } syn::Data::Union(u) => { return quote_spanned! {u.union_token.span()=> compile_error!("FlatSerializable not allowed on unions") } .into() } syn::Data::Struct(s) => s, }; let num_reprs = input .attrs .iter() .flat_map(|attr| { let meta = match attr.parse_meta() { Ok(meta) => meta, _ => return None, }; let has_repr = meta.path().get_ident().is_some_and(|id| id == "repr"); if !has_repr { return None; } attr.parse_args().ok().and_then(|ident: Ident| { if ident == "C" { return Some(ident); } None }) }) .count(); if num_reprs != 1 { return quote_spanned! {s.struct_token.span()=> compile_error!{"FlatSerializable only allowed on #[repr(C)] structs"} } .into(); } let s = FlatSerializeStruct { per_field_attrs: Default::default(), attrs: Default::default(), ident: name, lifetime: None, fields: s .fields .into_iter() .map(|f| FlatSerializeField { field: f, ty_without_lifetime: None, length_info: None, }) .collect(), }; let ident = &s.ident; let alignment_check = s.alignment_check(quote!(0), quote!(8)); let trait_check = s.fn_trait_check(); let required_alignment = s.fn_required_alignment(); let max_provided_alignment = s.fn_max_provided_alignment(); let min_len = s.fn_min_len(); let try_ref = s.fn_try_ref(None); let fill_slice = s.fn_fill_slice(); let len = s.fn_len(); // FIXME add check that all values are TRIVIAL_COPY let out = quote! { // alignment assertions #[allow(unused_assignments)] const _: () = #alignment_check; #trait_check unsafe impl<'a> flat_serialize::FlatSerializable<'a> for #ident { #required_alignment #max_provided_alignment #min_len const TRIVIAL_COPY: bool = true; type SLICE = flat_serialize::Slice<'a, #ident>; type OWNED = Self; #try_ref #fill_slice #len #[inline(always)] fn make_owned(&mut self) { // nop } #[inline(always)] fn into_owned(self) -> Self::OWNED { self } } }; out.into() } ================================================ FILE: crates/flat_serialize/flat_serialize_macro/src/parser.rs ================================================ use std::{collections::HashSet, ops::Deref}; use proc_macro2::TokenStream as TokenStream2; use syn::{ braced, parse::{Parse, ParseStream}, spanned::Spanned, token, visit::Visit, Attribute, Expr, Field, Ident, Result, Token, Type, }; use crate::{ FlatSerialize, FlatSerializeEnum, FlatSerializeField, FlatSerializeStruct, FlatSerializeVariant, PerFieldsAttr, VariableLenFieldInfo, }; use quote::{quote, quote_spanned}; const LIBRARY_MARKER: &str = "flat_serialize"; fn flat_serialize_attr_path(att_name: &str) -> syn::Path { let crate_name = quote::format_ident!("{}", LIBRARY_MARKER); let att_name = quote::format_ident!("{}", att_name); syn::parse_quote! { #crate_name :: #att_name } } impl Parse for FlatSerialize { fn parse(input: ParseStream) -> Result { let attrs = input.call(Attribute::parse_outer)?; let field_attr_path = flat_serialize_attr_path("field_attr"); let (per_field_attrs, attrs): (Vec<_>, _) = attrs .into_iter() .partition(|attr| attr.path == field_attr_path); let per_field_attrs: Result<_> = per_field_attrs .into_iter() .map(|a| a.parse_args_with(PerFieldsAttr::parse)) .collect(); let per_field_attrs = per_field_attrs?; let lookahead = input.lookahead1(); //TODO Visibility if lookahead.peek(Token![struct]) { input.parse().map(|mut s: FlatSerializeStruct| { s.per_field_attrs = per_field_attrs; s.attrs = attrs; FlatSerialize::Struct(s) }) } else if lookahead.peek(Token![enum]) { input.parse().map(|mut e: FlatSerializeEnum| { e.per_field_attrs = per_field_attrs; e.attrs = attrs; FlatSerialize::Enum(e) }) } else { Err(lookahead.error()) } } } impl Parse for FlatSerializeStruct { fn parse(input: ParseStream) -> Result { let content; let _struct_token: Token![struct] = input.parse()?; let ident = input.parse()?; let mut lifetime = None; if input.peek(Token![<]) { let _: Token![<] = input.parse()?; lifetime = Some(input.parse()?); let _: Token![>] = input.parse()?; } let _brace_token: token::Brace = braced!(content in input); let mut fields = content.parse_terminated(FlatSerializeField::parse)?; validate_self_fields(fields.iter_mut()); Ok(Self { per_field_attrs: vec![], attrs: vec![], ident, lifetime, fields, }) } } impl Parse for FlatSerializeEnum { fn parse(input: ParseStream) -> Result { let content; let _enum_token: Token![enum] = input.parse()?; let ident = input.parse()?; let mut lifetime = None; if input.peek(Token![<]) { let _: Token![<] = input.parse()?; lifetime = Some(input.parse()?); let _: Token![>] = input.parse()?; } let _brace_token: token::Brace = braced!(content in input); let tag = Field::parse_named(&content)?; let _comma_token: Token![,] = content.parse()?; let variants = content.parse_terminated(FlatSerializeVariant::parse)?; Ok(Self { per_field_attrs: vec![], attrs: vec![], ident, lifetime, tag: FlatSerializeField { field: tag, // TODO can we allow these? ty_without_lifetime: None, length_info: None, }, variants, }) } } impl Parse for FlatSerializeVariant { fn parse(input: ParseStream) -> Result { let content; let ident = input.parse()?; let _colon_token: Token![:] = input.parse()?; let tag_val = input.parse()?; let _brace_token: token::Brace = braced!(content in input); let mut fields = content.parse_terminated(FlatSerializeField::parse)?; validate_self_fields(fields.iter_mut()); Ok(Self { tag_val, body: FlatSerializeStruct { per_field_attrs: vec![], attrs: vec![], ident, lifetime: None, fields, }, }) } } impl Parse for FlatSerializeField { fn parse(input: ParseStream) -> Result { let mut field = Field::parse_named(input)?; // TODO switch to `drain_filter()` once stable let path = flat_serialize_attr_path("flatten"); let mut use_trait = false; field.attrs.retain(|attr| { let is_flatten = attr.path == path; if is_flatten { use_trait = true; return false; } true }); let mut length_info = None; if input.peek(Token![if]) { let _: Token![if] = input.parse()?; let expr = input.parse()?; length_info = Some(VariableLenFieldInfo { ty: field.ty.clone(), ty_without_lifetime: None, len_expr: expr, is_optional: true, }); } else if let syn::Type::Array(array) = &field.ty { let has_self = has_self_field(&array.len); if has_self { // let self_fields_are_valid = validate_self_field(&array.len, &seen_fields); length_info = Some(VariableLenFieldInfo { ty: (*array.elem).clone(), ty_without_lifetime: None, len_expr: array.len.clone(), is_optional: false, }); } } let mut ty_without_lifetime = None; if has_lifetime(&field.ty) { match &mut length_info { None => ty_without_lifetime = Some(as_turbofish(&field.ty)), Some(info) => { info.ty_without_lifetime = Some(as_turbofish(&info.ty)); } } } Ok(Self { field, ty_without_lifetime, length_info, }) } } // TODO should we leave this in? impl Deref for FlatSerializeField { type Target = Field; fn deref(&self) -> &Self::Target { &self.field } } impl Parse for PerFieldsAttr { fn parse(input: ParseStream) -> Result { let fixed: syn::MetaNameValue = input.parse()?; let mut variable: Option = None; if !input.is_empty() { let _comma_token: Token![,] = input.parse()?; if !input.is_empty() { variable = Some(input.parse()?) } if !input.is_empty() { let _comma_token: Token![,] = input.parse()?; } } if !fixed.path.is_ident("fixed") { return Err(syn::Error::new(fixed.path.span(), "expected `fixed`")); } if !variable .as_ref() .map(|v| v.path.is_ident("variable")) .unwrap_or(true) { return Err(syn::Error::new( variable.unwrap().path.span(), "expected `variable`", )); } let fixed = match &fixed.lit { syn::Lit::Str(fixed) => { let mut fixed_attrs = fixed.parse_with(Attribute::parse_outer)?; if fixed_attrs.len() != 1 { return Err(syn::Error::new( fixed.span(), "must contain exactly one attribute", )); } fixed_attrs.pop().unwrap() } _ => { return Err(syn::Error::new( fixed.lit.span(), "must contain exactly one attribute", )) } }; let variable = match variable { None => None, Some(variable) => match &variable.lit { syn::Lit::Str(variable) => { let mut variable_attrs = variable.parse_with(Attribute::parse_outer)?; if variable_attrs.len() != 1 { return Err(syn::Error::new( variable.span(), "must contain exactly one attribute", )); } Some(variable_attrs.pop().unwrap()) } _ => { return Err(syn::Error::new( variable.lit.span(), "must contain exactly one attribute", )) } }, }; Ok(Self { fixed, variable }) } } fn has_self_field(expr: &Expr) -> bool { let mut has_self = FindSelf(false); has_self.visit_expr(expr); has_self.0 } struct FindSelf(bool); impl<'ast> Visit<'ast> for FindSelf { fn visit_path_segment(&mut self, i: &'ast syn::PathSegment) { if self.0 { return; } self.0 |= i.ident == "self" } } /// validate that all references to a field in the struct (e.g. `len` in /// `[u8; self.len + 1]`) contained in expression refers to already defined /// fields. Otherwise output a "attempting to use field before definition" /// compile error. This is used to ensure that wse don't generate structs that /// are impossible to deserialize because fields are in ambiguous positions such /// as /// ```skip /// struct { /// variable: [u8; self.len], /// len: u32, /// } /// ``` /// where the position of `len` depends on the value of `len`. fn validate_self_fields<'a>(fields: impl Iterator) { let mut seen_fields = HashSet::new(); for f in fields { if let Some(length_info) = &mut f.length_info { if let Err(error) = validate_self_field(&length_info.len_expr, &seen_fields) { length_info.len_expr = syn::parse2(error).unwrap() } } seen_fields.insert(f.ident.as_ref().unwrap()); } } fn validate_self_field( expr: &Expr, seen_fields: &HashSet<&Ident>, ) -> std::result::Result<(), TokenStream2> { let mut validate_fields = ValidateLenFields(None, seen_fields); validate_fields.visit_expr(expr); match validate_fields.0 { Some(error) => Err(error), None => Ok(()), } } struct ValidateLenFields<'a, 'b>(Option, &'b HashSet<&'a Ident>); impl<'ast> Visit<'ast> for ValidateLenFields<'_, '_> { fn visit_expr(&mut self, expr: &'ast syn::Expr) { if self.0.is_some() { return; } match expr { syn::Expr::Field(field) => { if let syn::Expr::Path(path) = &*field.base { if path.path.segments[0].ident == "self" { let name = match &field.member { syn::Member::Named(name) => name.clone(), syn::Member::Unnamed(_) => panic!("unnamed fields not supported"), }; if !self.1.contains(&name) { self.0 = Some(quote_spanned! {name.span()=> compile_error!("attempting to use field before definition") }) } } } } _ => syn::visit::visit_expr(self, expr), } } } pub fn as_turbofish(ty: &Type) -> TokenStream2 { let path = match &ty { Type::Path(path) => path, _ => { return quote_spanned! {ty.span()=> compile_error!("can only flatten path-based types") } } }; if path.qself.is_some() { return quote_spanned! {ty.span()=> compile_error!("cannot use `` in flatten") }; } let path = &path.path; let leading_colon = &path.leading_colon; let mut output = quote! {}; let mut error = None; for segment in &path.segments { match &segment.arguments { syn::PathArguments::Parenthesized(args) => { error = Some(quote_spanned! {args.span()=> compile_error!("cannot use `()` in flatten") }); } syn::PathArguments::None => { if output.is_empty() { output = quote! { #leading_colon #segment }; } else { output = quote! { #output::#segment}; } } syn::PathArguments::AngleBracketed(_) => { let ident = &segment.ident; if output.is_empty() { // TODO leave in args? // output = quote!{ #leading_colon #ident::#args }; output = quote! { #leading_colon #ident }; } else { // TODO leave in args? // output = quote!{ #output::#ident::#args }; output = quote! { #output::#ident }; } } } } if let Some(error) = error { return error; } output } pub fn has_lifetime(ty: &Type) -> bool { struct Visitor(bool); impl<'ast> Visit<'ast> for Visitor { fn visit_lifetime(&mut self, _: &'ast syn::Lifetime) { self.0 = true } } let mut visit = Visitor(false); syn::visit::visit_type(&mut visit, ty); visit.0 } ================================================ FILE: crates/hyperloglogplusplus/Cargo.toml ================================================ [package] name = "hyperloglogplusplus" version = "0.1.0" edition = "2021" [dependencies] serde = { version = "1.0", features = ["derive"] } encodings = { path="../encodings" } [dev-dependencies] fnv = "1.0.3" quickcheck = "1" quickcheck_macros = "1" [features] default = [] flaky_tests = [] ================================================ FILE: crates/hyperloglogplusplus/src/dense.rs ================================================ use crate::hyperloglog_data::{ BIAS_DATA_OFFSET, BIAS_DATA_VEC, RAW_ESTIMATE_DATA_OFFSET, RAW_ESTIMATE_DATA_VEC, THRESHOLD_DATA_OFFSET, THRESHOLD_DATA_VEC, }; use crate::{registers::Registers, Extractable}; #[derive(Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct Storage<'s> { pub registers: Registers<'s>, // TODO can be derived from block.len() index_shift: u8, pub precision: u8, hash_mask: u64, } impl<'s> Storage<'s> { pub fn new(precision: u8) -> Self { // TODO what is max precision assert!( (4..=18).contains(&precision), "invalid value for precision: {precision}; must be within [4, 18]", ); let non_index_bits = 64 - precision; Self { registers: Registers::new(precision), index_shift: non_index_bits, precision, hash_mask: (1 << non_index_bits) - 1, } } pub fn from_parts(registers: &'s [u8], precision: u8) -> Self { let non_index_bits = 64 - precision; Self { registers: Registers::from_raw(registers), index_shift: non_index_bits, precision, hash_mask: (1 << non_index_bits) - 1, } } pub fn into_owned(&self) -> Storage<'static> { Storage { registers: self.registers.into_owned(), index_shift: self.index_shift, precision: self.precision, hash_mask: self.hash_mask, } } pub fn add_hash(&mut self, hash: u64) { let (idx, count) = self.idx_count_from_hash(hash); self.registers.set_max(idx, count); } pub fn add_encoded(&mut self, encoded: crate::sparse::Encoded) { let (idx, count) = self.idx_count_from_encoded(encoded); self.registers.set_max(idx, count); } fn idx_count_from_hash(&self, hash: u64) -> (usize, u8) { let idx = hash.extract(63, self.precision); // w in the paper let hash_bits = hash.extract_bits(63 - self.precision, 0); let count = hash_bits.q() - self.precision; (idx as usize, count) } fn idx_count_from_encoded(&self, encoded: crate::sparse::Encoded) -> (usize, u8) { let old_idx = encoded.idx(); let idx = old_idx >> (25 - self.precision); let count = encoded.count(self.precision); (idx as usize, count) } pub fn estimate_count(&self) -> u64 { let num_zeros = self.registers.count_zeroed_registers(); let sum: f64 = self .registers .iter() .map(|v| 2.0f64.powi(-(v as i32))) .sum(); let m = (1 << self.precision) as f64; let a_m = self.a_m(); let e = a_m * m.powi(2) / sum; let e_p = if e <= 5.0 * m { e - self.estimate_bias(e) } else { e }; let h = if num_zeros != 0 { self.linear_counting(num_zeros as f64) } else { e_p }; if h <= self.threshold() { h as u64 } else { e_p as u64 } } fn linear_counting(&self, v: f64) -> f64 { let m = (1 << self.precision) as f64; m * (m / v).ln() } fn threshold(&self) -> f64 { THRESHOLD_DATA_VEC[self.precision as usize - THRESHOLD_DATA_OFFSET] as f64 } fn a_m(&self) -> f64 { let size = 1 << self.precision; let m = size as f64; match size { 16 => 0.673, 32 => 0.697, 64 => 0.709, _ => 0.7213 / (1.0 + 1.079 / m), } } fn estimate_bias(&self, estimate: f64) -> f64 { use Bounds::*; let raw_estimates = RAW_ESTIMATE_DATA_VEC[self.precision as usize - RAW_ESTIMATE_DATA_OFFSET]; let bias_data = BIAS_DATA_VEC[self.precision as usize - BIAS_DATA_OFFSET]; let start = raw_estimates.binary_search_by(|v| v.partial_cmp(&estimate).unwrap()); let mut bounds = match start { Ok(i) => return bias_data[i], Err(0) => Right(0), Err(i) if i == raw_estimates.len() => Left(i - 1), Err(i) => Both(i - 1, i), }; let mut neighbors = [0; 6]; let mut distances = [0.0; 6]; for i in 0..6 { let (idx, distance) = bounds.next_closest(estimate, raw_estimates); neighbors[i] = idx; distances[i] = distance; } for distance in &mut distances { *distance = 1.0 / *distance; } let total: f64 = distances.iter().sum(); for distance in &mut distances { *distance /= total; } let mut value = 0.0; for i in 0..6 { value += distances[i] * bias_data[neighbors[i]]; } return value; enum Bounds { Left(usize), Right(usize), Both(usize, usize), } impl Bounds { // find the closet neighbor to `estimate` in `raw_estimates` and update self fn next_closest(&mut self, estimate: f64, raw_estimates: &[f64]) -> (usize, f64) { match self { Left(i) => { let idx = *i; *i -= 1; (idx, (raw_estimates[*i] - estimate).abs()) } Right(i) => { let idx = *i; *i += 1; (idx, (raw_estimates[*i] - estimate).abs()) } Both(l, r) => { let left_delta = (raw_estimates[*l] - estimate).abs(); let right_delta = (raw_estimates[*r] - estimate).abs(); if right_delta < left_delta { let idx = *r; if *r < raw_estimates.len() - 1 { *r += 1; return (idx, right_delta); } *self = Left(*l); (idx, right_delta) } else { let idx = *l; if *l > 0 { *l -= 1; return (idx, left_delta); } *self = Right(*r); (idx, left_delta) } } } } } } pub fn merge_in(&mut self, other: &Storage<'_>) { assert!( self.precision == other.precision, "precision must be equal (left={}, right={})", self.precision, other.precision ); assert!( self.registers.bytes().len() == other.registers.bytes().len(), "registers length must be equal (left={}, right={})", self.registers.bytes().len(), other.registers.bytes().len(), ); // TODO this is probably inefficient for (i, r) in other.registers.iter().enumerate() { self.registers.set_max(i, r) } } pub fn num_bytes(&self) -> usize { self.registers.byte_len() } } #[cfg(test)] mod tests { use fnv::FnvHasher; use crate::sparse::Encoded; use super::*; use std::{ collections::HashSet, hash::{Hash, Hasher}, }; pub fn hash(val: V) -> u64 { let mut hasher = FnvHasher::default(); val.hash(&mut hasher); hasher.finish() } #[test] #[should_panic(expected = "invalid value for precision: 3; must be within [4, 18]")] fn new_panics_b3() { Storage::new(3); } #[test] fn new_works_b4() { Storage::new(4); } #[test] fn new_works_b18() { Storage::new(18); } #[test] #[should_panic(expected = "invalid value for precision: 19; must be within [4, 18]")] fn new_panics_b19() { Storage::new(19); } #[test] fn empty() { assert_eq!(Storage::new(8).estimate_count(), 0); } #[test] fn add_b4_n1k() { let mut hll = Storage::new(4); for i in 0..1000 { hll.add_hash(hash(i)); } // FIXME examine in more detail assert_eq!(hll.estimate_count(), 96); } #[test] fn add_b8_n1k() { let mut hll = Storage::new(8); for i in 0..1000 { hll.add_hash(hash(i)); } // FIXME examine in more detail assert_eq!(hll.estimate_count(), 430); } #[test] fn add_b12_n1k() { let mut hll = Storage::new(12); for i in 0..1000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 1146); } #[test] fn add_b16_n1k() { let mut hll = Storage::new(16); for i in 0..1000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 1007); } #[test] fn add_b8_n10k() { let mut hll = Storage::new(8); for i in 0..10000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 10536); } #[test] fn add_b12_n10k() { let mut hll = Storage::new(12); for i in 0..10000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 11347); } #[test] fn add_b16_n10k() { let mut hll = Storage::new(16); for i in 0..10000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 10850); } #[test] fn add_b16_n100k() { let mut hll = Storage::new(16); for i in 0..100000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 117304); } #[test] fn add_b16_n1m() { let mut hll = Storage::new(16); for i in 0..1000000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 882644); } #[test] fn clone() { let mut hll1 = Storage::new(12); for i in 0..500 { hll1.add_hash(hash(i)); } let c1a = hll1.estimate_count(); let hll2 = hll1.clone(); assert_eq!(hll2.estimate_count(), c1a); for i in 501..1000 { hll1.add_hash(hash(i)); } let c1b = hll1.estimate_count(); assert_ne!(c1b, c1a); assert_eq!(hll2.estimate_count(), c1a); } #[test] fn merge() { let mut hll1 = Storage::new(12); let mut hll2 = Storage::new(12); let mut hll = Storage::new(12); for i in 0..500 { hll.add_hash(hash(i)); hll1.add_hash(hash(i)); } for i in 501..1000 { hll.add_hash(hash(i)); hll2.add_hash(hash(i)); } assert_ne!(hll.estimate_count(), hll1.estimate_count()); assert_ne!(hll.estimate_count(), hll2.estimate_count()); hll1.merge_in(&hll2); assert_eq!(hll.estimate_count(), hll1.estimate_count()); } #[test] #[should_panic(expected = "precision must be equal (left=5, right=12)")] fn merge_panics_p() { let mut hll1 = Storage::new(5); let hll2 = Storage::new(12); hll1.merge_in(&hll2); } #[test] fn issue_74() { let panic_data = vec![ "ofr-1-1517560282779878449", "ofr-1-1517589543534331019", "ofr-1-1517590532450550786", "ofr-1-1517644560121333465", "ofr-1-1517746611185649116", "ofr-1-1518051376300950677", "ofr-1-1518484387459892414", "ofr-1-1518488008830355319", "ofr-1-1518488407814571264", "ofr-1-1518561818180978525", "ofr-1-1518678274740717330", "ofr-1-1519461045930165638", "ofr-1-1519470647696557288", "ofr-1-1519567114956309703", "ofr-1-1519653616441755584", "ofr-1-1519655049912256356", "ofr-1-1520105514088138521", "ofr-1-1520294225822221822", "ofr-1-1520319017418884884", "ofr-1-1520505982893295286", "ofr-1-1520553027150677707", "ofr-1-1520925550686111649", "ofr-1-1520927095122167663", "ofr-1-1521290010424640726", "ofr-1-1521458659554886917", "ofr-1-1521943577454052994", "ofr-1-1521971260753839540", "ofr-1-1522000670785668758", "ofr-1-1522043914876749176", "ofr-1-1522206531944580201", "ofr-1-1522234960069920034", "ofr-1-1522333169901504119", "ofr-1-1522363887846294936", "ofr-1-1522484446749918495", "ofr-1-1522600458059122179", "ofr-1-1522687450205783676", "ofr-1-1522765602785461678", "ofr-1-1522815395559769187", "ofr-1-1522839112893465736", "ofr-1-1523001178903151627", "ofr-1-1523018056414397988", "ofr-1-1523096555609261412", "ofr-1-1523103371222189143", "ofr-1-1523256333918667890", "ofr-1-1523270427746895732", "ofr-1-1523411745695466681", "ofr-1-1523630566301631536", "ofr-1-1523839014553388093", "ofr-1-1523894230803940925", "ofr-1-1523931915564221543", "ofr-1-1524104734332815100", "ofr-1-1524113364834715372", "ofr-1-1524209603273164167", "ofr-1-1524276802153219312", "ofr-1-1524554894791804305", "ofr-1-1524621894100584193", ]; let mut hll = Storage::new(4); for entry in &panic_data { hll.add_hash(hash(entry)); } hll.estimate_count(); } #[quickcheck] fn quick_16(values: HashSet) -> quickcheck::TestResult { let mut hll = Storage::new(16); let expected = values.iter().collect::>().len() as f64; for value in values { hll.add_hash(value); } let estimated = hll.estimate_count() as f64; let error = 0.01 * expected; // quickcheck instantly finds hash collisions, so we can only check that // we underestimate the cardinality if estimated <= expected + error { return quickcheck::TestResult::passed(); } println!("got {}, expected {} +- {}", estimated, expected, error); quickcheck::TestResult::failed() } #[cfg(feature = "flaky_tests")] #[quickcheck] fn quick_8(values: Vec) -> quickcheck::TestResult { let mut hll = Storage::new(8); let expected = values.iter().collect::>().len() as f64; for value in values { hll.add_hash(value); } let estimated = hll.estimate_count() as f64; let error = 0.10 * expected; // quickcheck instantly finds hash collisions, so we can only check that // we underestimate the cardinality if estimated <= expected + error { return quickcheck::TestResult::passed(); } println!("got {}, expected {} +- {}", estimated, expected, error); quickcheck::TestResult::failed() } #[quickcheck] fn quick_decode_16(value: u64) -> bool { let hll = Storage::new(8); let from_hash = hll.idx_count_from_hash(value); let from_encoded = hll.idx_count_from_encoded(Encoded::from_hash(value, hll.precision)); if from_hash != from_encoded { println!( "{:#x}, expected {:?}, got {:?}", value, from_hash, from_encoded ); return false; } true } } ================================================ FILE: crates/hyperloglogplusplus/src/hyperloglog_data.rs ================================================ // based on https://github.com/crepererum/pdatastructs.rs/blob/e4f49e6462187700b9a12e8301df9a72a0c6e58c/src/hyperloglog_data.rs // and https://goo.gl/iU8Ig #![allow(clippy::unreadable_literal)] pub(crate) const THRESHOLD_DATA_OFFSET: usize = 4; pub(crate) const THRESHOLD_DATA_VEC: &[usize] = &[ 10, // b = 4 20, // b = 5 40, // b = 6 80, // b = 7 220, // b = 8 400, // b = 9 900, // b = 10 1800, // b = 11 3100, // b = 12 6500, // b = 13 11500, // b = 14 20000, // b = 15 50000, // b = 16 120000, // b = 17 350000, // b = 18 ]; pub(crate) const RAW_ESTIMATE_DATA_OFFSET: usize = 4; pub(crate) const RAW_ESTIMATE_DATA_VEC: &[&[f64]] = &[ // precision 4 &[ 11., 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, 51.4996, 52.3824, 53.3078, 54.3984, 55.5838, 56.6618, 57.2174, 58.3514, 59.0802, 60.1482, 61.0376, 62.3598, 62.8078, 63.9744, 64.914, 65.781, 67.1806, 68.0594, 68.8446, 69.7928, 70.8248, 71.8324, 72.8598, 73.6246, 74.7014, 75.393, 76.6708, 77.2394, ], // precision 5 &[ 23., 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.755, 61.472, 62.2076, 63.1024, 63.8908, 64.7338, 65.7728, 66.629, 67.413, 68.3266, 69.1524, 70.2642, 71.1806, 72.0566, 72.9192, 73.7598, 74.3516, 75.5802, 76.4386, 77.4916, 78.1524, 79.1892, 79.8414, 80.8798, 81.8376, 82.4698, 83.7656, 84.331, 85.5914, 86.6012, 87.7016, 88.5582, 89.3394, 90.3544, 91.4912, 92.308, 93.3552, 93.9746, 95.2052, 95.727, 97.1322, 98.3944, 98.7588, 100.242, 101.1914, 102.2538, 102.8776, 103.6292, 105.1932, 105.9152, 107.0868, 107.6728, 108.7144, 110.3114, 110.8716, 111.245, 112.7908, 113.7064, 114.636, 115.7464, 116.1788, 117.7464, 118.4896, 119.6166, 120.5082, 121.7798, 122.9028, 123.4426, 124.8854, 125.705, 126.4652, 128.3464, 128.3462, 130.0398, 131.0342, 131.0042, 132.4766, 133.511, 134.7252, 135.425, 136.5172, 138.0572, 138.6694, 139.3712, 140.8598, 141.4594, 142.554, 143.4006, 144.7374, 146.1634, 146.8994, 147.605, 147.9304, 149.1636, 150.2468, 151.5876, 152.2096, 153.7032, 154.7146, 155.807, 156.9228, 157.0372, 158.5852, ], // precision 6 &[ 46., 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, 96.3356, 97.2286, 98.7262, 100.3284, 101.1048, 102.5962, 103.3562, 105.1272, 106.4184, 107.4974, 109.0822, 109.856, 111.48, 113.2834, 114.0208, 115.637, 116.5174, 118.0576, 119.7476, 120.427, 122.1326, 123.2372, 125.2788, 126.6776, 127.7926, 129.1952, 129.9564, 131.6454, 133.87, 134.5428, 136.2, 137.0294, 138.6278, 139.6782, 141.792, 143.3516, 144.2832, 146.0394, 147.0748, 148.4912, 150.849, 151.696, 153.5404, 154.073, 156.3714, 157.7216, 158.7328, 160.4208, 161.4184, 163.9424, 165.2772, 166.411, 168.1308, 168.769, 170.9258, 172.6828, 173.7502, 175.706, 176.3886, 179.0186, 180.4518, 181.927, 183.4172, 184.4114, 186.033, 188.5124, 189.5564, 191.6008, 192.4172, 193.8044, 194.997, 197.4548, 198.8948, 200.2346, 202.3086, 203.1548, 204.8842, 206.6508, 206.6772, 209.7254, 210.4752, 212.7228, 214.6614, 215.1676, 217.793, 218.0006, 219.9052, 221.66, 223.5588, 225.1636, 225.6882, 227.7126, 229.4502, 231.1978, 232.9756, 233.1654, 236.727, 238.1974, 237.7474, 241.1346, 242.3048, 244.1948, 245.3134, 246.879, 249.1204, 249.853, 252.6792, 253.857, 254.4486, 257.2362, 257.9534, 260.0286, 260.5632, 262.663, 264.723, 265.7566, 267.2566, 267.1624, 270.62, 272.8216, 273.2166, 275.2056, 276.2202, 278.3726, 280.3344, 281.9284, 283.9728, 284.1924, 286.4872, 287.587, 289.807, 291.1206, 292.769, 294.8708, 296.665, 297.1182, 299.4012, 300.6352, 302.1354, 304.1756, 306.1606, 307.3462, 308.5214, 309.4134, 310.8352, 313.9684, 315.837, 316.7796, 318.9858, ], // precision 7 &[ 92., 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, 181.4476, 184.5882, 186.6816, 189.0824, 191.0258, 193.6048, 196.4436, 198.7274, 200.957, 203.147, 205.4364, 208.7592, 211.3386, 213.781, 215.8028, 218.656, 221.6544, 223.996, 226.4718, 229.1544, 231.6098, 234.5956, 237.0616, 239.5758, 242.4878, 244.5244, 248.2146, 250.724, 252.8722, 255.5198, 258.0414, 261.941, 264.9048, 266.87, 269.4304, 272.028, 274.4708, 278.37, 281.0624, 283.4668, 286.5532, 289.4352, 293.2564, 295.2744, 298.2118, 300.7472, 304.1456, 307.2928, 309.7504, 312.5528, 315.979, 318.2102, 322.1834, 324.3494, 327.325, 330.6614, 332.903, 337.2544, 339.9042, 343.215, 345.2864, 348.0814, 352.6764, 355.301, 357.139, 360.658, 363.1732, 366.5902, 369.9538, 373.0828, 375.922, 378.9902, 382.7328, 386.4538, 388.1136, 391.2234, 394.0878, 396.708, 401.1556, 404.1852, 406.6372, 409.6822, 412.7796, 416.6078, 418.4916, 422.131, 424.5376, 428.1988, 432.211, 434.4502, 438.5282, 440.912, 444.0448, 447.7432, 450.8524, 453.7988, 456.7858, 458.8868, 463.9886, 466.5064, 468.9124, 472.6616, 475.4682, 478.582, 481.304, 485.2738, 488.6894, 490.329, 496.106, 497.6908, 501.1374, 504.5322, 506.8848, 510.3324, 513.4512, 516.179, 520.4412, 522.6066, 526.167, 528.7794, 533.379, 536.067, 538.46, 542.9116, 545.692, 547.9546, 552.493, 555.2722, 557.335, 562.449, 564.2014, 569.0738, 571.0974, 574.8564, 578.2996, 581.409, 583.9704, 585.8098, 589.6528, 594.5998, 595.958, 600.068, 603.3278, 608.2016, 609.9632, 612.864, 615.43, 620.7794, 621.272, 625.8644, 629.206, 633.219, 634.5154, 638.6102, ], // precision 8 &[ 184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, 358.8912, 364.114, 368.4646, 372.9744, 378.4092, 382.6022, 387.843, 392.5684, 397.1652, 402.5426, 407.4152, 412.5388, 417.3592, 422.1366, 427.486, 432.3918, 437.5076, 442.509, 447.3834, 453.3498, 458.0668, 463.7346, 469.1228, 473.4528, 479.7, 484.644, 491.0518, 495.5774, 500.9068, 506.432, 512.1666, 517.434, 522.6644, 527.4894, 533.6312, 538.3804, 544.292, 550.5496, 556.0234, 562.8206, 566.6146, 572.4188, 579.117, 583.6762, 590.6576, 595.7864, 601.509, 607.5334, 612.9204, 619.772, 624.2924, 630.8654, 636.1836, 642.745, 649.1316, 655.0386, 660.0136, 666.6342, 671.6196, 678.1866, 684.4282, 689.3324, 695.4794, 702.5038, 708.129, 713.528, 720.3204, 726.463, 732.7928, 739.123, 744.7418, 751.2192, 756.5102, 762.6066, 769.0184, 775.2224, 781.4014, 787.7618, 794.1436, 798.6506, 805.6378, 811.766, 819.7514, 824.5776, 828.7322, 837.8048, 843.6302, 849.9336, 854.4798, 861.3388, 867.9894, 873.8196, 880.3136, 886.2308, 892.4588, 899.0816, 905.4076, 912.0064, 917.3878, 923.619, 929.998, 937.3482, 943.9506, 947.991, 955.1144, 962.203, 968.8222, 975.7324, 981.7826, 988.7666, 994.2648, 1000.3128, 1007.4082, 1013.7536, 1020.3376, 1026.7156, 1031.7478, 1037.4292, 1045.393, 1051.2278, 1058.3434, 1062.8726, 1071.884, 1076.806, 1082.9176, 1089.1678, 1095.5032, 1102.525, 1107.2264, 1115.315, 1120.93, 1127.252, 1134.1496, 1139.0408, 1147.5448, 1153.3296, 1158.1974, 1166.5262, 1174.3328, 1175.657, 1184.4222, 1190.9172, 1197.1292, 1204.4606, 1210.4578, 1218.8728, 1225.3336, 1226.6592, 1236.5768, 1241.363, 1249.4074, 1254.6566, 1260.8014, 1266.5454, 1274.5192, ], // precision 9 &[ 369., 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.5538, 775.5058, 785.2646, 795.5902, 804.3818, 814.8998, 824.9532, 835.2062, 845.2798, 854.4728, 864.9582, 875.3292, 886.171, 896.781, 906.5716, 916.7048, 927.5322, 937.875, 949.3972, 958.3464, 969.7274, 980.2834, 992.1444, 1003.4264, 1013.0166, 1024.018, 1035.0438, 1046.34, 1057.6856, 1068.9836, 1079.0312, 1091.677, 1102.3188, 1113.4846, 1124.4424, 1135.739, 1147.1488, 1158.9202, 1169.406, 1181.5342, 1193.2834, 1203.8954, 1216.3286, 1226.2146, 1239.6684, 1251.9946, 1262.123, 1275.4338, 1285.7378, 1296.076, 1308.9692, 1320.4964, 1333.0998, 1343.9864, 1357.7754, 1368.3208, 1380.4838, 1392.7388, 1406.0758, 1416.9098, 1428.9728, 1440.9228, 1453.9292, 1462.617, 1476.05, 1490.2996, 1500.6128, 1513.7392, 1524.5174, 1536.6322, 1548.2584, 1562.3766, 1572.423, 1587.1232, 1596.5164, 1610.5938, 1622.5972, 1633.1222, 1647.7674, 1658.5044, 1671.57, 1683.7044, 1695.4142, 1708.7102, 1720.6094, 1732.6522, 1747.841, 1756.4072, 1769.9786, 1782.3276, 1797.5216, 1808.3186, 1819.0694, 1834.354, 1844.575, 1856.2808, 1871.1288, 1880.7852, 1893.9622, 1906.3418, 1920.6548, 1932.9302, 1945.8584, 1955.473, 1968.8248, 1980.6446, 1995.9598, 2008.349, 2019.8556, 2033.0334, 2044.0206, 2059.3956, 2069.9174, 2082.6084, 2093.7036, 2106.6108, 2118.9124, 2132.301, 2144.7628, 2159.8422, 2171.0212, 2183.101, 2193.5112, 2208.052, 2221.3194, 2233.3282, 2247.295, 2257.7222, 2273.342, 2286.5638, 2299.6786, 2310.8114, 2322.3312, 2335.516, 2349.874, 2363.5968, 2373.865, 2387.1918, 2401.8328, 2414.8496, 2424.544, 2436.7592, 2447.1682, 2464.1958, 2474.3438, 2489.0006, 2497.4526, 2513.6586, 2527.19, 2540.7028, 2553.768, ], // precision 10 &[ 738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956, 1474.8792, 1494.118, 1513.62, 1532.5132, 1551.9322, 1570.7726, 1590.6086, 1610.5332, 1630.5918, 1650.4294, 1669.7662, 1690.4106, 1710.7338, 1730.9012, 1750.4486, 1770.1556, 1791.6338, 1812.7312, 1833.6264, 1853.9526, 1874.8742, 1896.8326, 1918.1966, 1939.5594, 1961.07, 1983.037, 2003.1804, 2026.071, 2047.4884, 2070.0848, 2091.2944, 2114.333, 2135.9626, 2158.2902, 2181.0814, 2202.0334, 2224.4832, 2246.39, 2269.7202, 2292.1714, 2314.2358, 2338.9346, 2360.891, 2384.0264, 2408.3834, 2430.1544, 2454.8684, 2476.9896, 2501.4368, 2522.8702, 2548.0408, 2570.6738, 2593.5208, 2617.0158, 2640.2302, 2664.0962, 2687.4986, 2714.2588, 2735.3914, 2759.6244, 2781.8378, 2808.0072, 2830.6516, 2856.2454, 2877.2136, 2903.4546, 2926.785, 2951.2294, 2976.468, 3000.867, 3023.6508, 3049.91, 3073.5984, 3098.162, 3121.5564, 3146.2328, 3170.9484, 3195.5902, 3221.3346, 3242.7032, 3271.6112, 3296.5546, 3317.7376, 3345.072, 3369.9518, 3394.326, 3418.1818, 3444.6926, 3469.086, 3494.2754, 3517.8698, 3544.248, 3565.3768, 3588.7234, 3616.979, 3643.7504, 3668.6812, 3695.72, 3719.7392, 3742.6224, 3770.4456, 3795.6602, 3819.9058, 3844.002, 3869.517, 3895.6824, 3920.8622, 3947.1364, 3973.985, 3995.4772, 4021.62, 4046.628, 4074.65, 4096.2256, 4121.831, 4146.6406, 4173.276, 4195.0744, 4223.9696, 4251.3708, 4272.9966, 4300.8046, 4326.302, 4353.1248, 4374.312, 4403.0322, 4426.819, 4450.0598, 4478.5206, 4504.8116, 4528.8928, 4553.9584, 4578.8712, 4603.8384, 4632.3872, 4655.5128, 4675.821, 4704.6222, 4731.9862, 4755.4174, 4781.2628, 4804.332, 4832.3048, 4862.8752, 4883.4148, 4906.9544, 4935.3516, 4954.3532, 4984.0248, 5011.217, 5035.3258, 5057.3672, 5084.1828, ], // precision 11 &[ 1477., 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408, 2913.4926, 2951.4938, 2989.6776, 3026.282, 3065.7704, 3104.1012, 3143.7388, 3181.6876, 3221.1872, 3261.5048, 3300.0214, 3339.806, 3381.409, 3421.4144, 3461.4294, 3502.2286, 3544.651, 3586.6156, 3627.337, 3670.083, 3711.1538, 3753.5094, 3797.01, 3838.6686, 3882.1678, 3922.8116, 3967.9978, 4009.9204, 4054.3286, 4097.5706, 4140.6014, 4185.544, 4229.5976, 4274.583, 4316.9438, 4361.672, 4406.2786, 4451.8628, 4496.1834, 4543.505, 4589.1816, 4632.5188, 4678.2294, 4724.8908, 4769.0194, 4817.052, 4861.4588, 4910.1596, 4956.4344, 5002.5238, 5048.13, 5093.6374, 5142.8162, 5187.7894, 5237.3984, 5285.6078, 5331.0858, 5379.1036, 5428.6258, 5474.6018, 5522.7618, 5571.5822, 5618.59, 5667.9992, 5714.88, 5763.454, 5808.6982, 5860.3644, 5910.2914, 5953.571, 6005.9232, 6055.1914, 6104.5882, 6154.5702, 6199.7036, 6251.1764, 6298.7596, 6350.0302, 6398.061, 6448.4694, 6495.933, 6548.0474, 6597.7166, 6646.9416, 6695.9208, 6742.6328, 6793.5276, 6842.1934, 6894.2372, 6945.3864, 6996.9228, 7044.2372, 7094.1374, 7142.2272, 7192.2942, 7238.8338, 7288.9006, 7344.0908, 7394.8544, 7443.5176, 7490.4148, 7542.9314, 7595.6738, 7641.9878, 7694.3688, 7743.0448, 7797.522, 7845.53, 7899.594, 7950.3132, 7996.455, 8050.9442, 8092.9114, 8153.1374, 8197.4472, 8252.8278, 8301.8728, 8348.6776, 8401.4698, 8453.551, 8504.6598, 8553.8944, 8604.1276, 8657.6514, 8710.3062, 8758.908, 8807.8706, 8862.1702, 8910.4668, 8960.77, 9007.2766, 9063.164, 9121.0534, 9164.1354, 9218.1594, 9267.767, 9319.0594, 9372.155, 9419.7126, 9474.3722, 9520.1338, 9572.368, 9622.7702, 9675.8448, 9726.5396, 9778.7378, 9827.6554, 9878.1922, 9928.7782, 9978.3984, 10026.578, 10076.5626, 10137.1618, 10177.5244, 10229.9176, ], // precision 12 &[ 2954., 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757, 5830.2072, 5905.2828, 5980.0434, 6056.6264, 6134.3192, 6211.5746, 6290.0816, 6367.1176, 6447.9796, 6526.5576, 6606.1858, 6686.9144, 6766.1142, 6847.0818, 6927.9664, 7010.9096, 7091.0816, 7175.3962, 7260.3454, 7344.018, 7426.4214, 7511.3106, 7596.0686, 7679.8094, 7765.818, 7852.4248, 7936.834, 8022.363, 8109.5066, 8200.4554, 8288.5832, 8373.366, 8463.4808, 8549.7682, 8642.0522, 8728.3288, 8820.9528, 8907.727, 9001.0794, 9091.2522, 9179.988, 9269.852, 9362.6394, 9453.642, 9546.9024, 9640.6616, 9732.6622, 9824.3254, 9917.7484, 10007.9392, 10106.7508, 10196.2152, 10289.8114, 10383.5494, 10482.3064, 10576.8734, 10668.7872, 10764.7156, 10862.0196, 10952.793, 11049.9748, 11146.0702, 11241.4492, 11339.2772, 11434.2336, 11530.741, 11627.6136, 11726.311, 11821.5964, 11918.837, 12015.3724, 12113.0162, 12213.0424, 12306.9804, 12408.4518, 12504.8968, 12604.586, 12700.9332, 12798.705, 12898.5142, 12997.0488, 13094.788, 13198.475, 13292.7764, 13392.9698, 13486.8574, 13590.1616, 13686.5838, 13783.6264, 13887.2638, 13992.0978, 14081.0844, 14189.9956, 14280.0912, 14382.4956, 14486.4384, 14588.1082, 14686.2392, 14782.276, 14888.0284, 14985.1864, 15088.8596, 15187.0998, 15285.027, 15383.6694, 15495.8266, 15591.3736, 15694.2008, 15790.3246, 15898.4116, 15997.4522, 16095.5014, 16198.8514, 16291.7492, 16402.6424, 16499.1266, 16606.2436, 16697.7186, 16796.3946, 16902.3376, 17005.7672, 17100.814, 17206.8282, 17305.8262, 17416.0744, 17508.4092, 17617.0178, 17715.4554, 17816.758, 17920.1748, 18012.9236, 18119.7984, 18223.2248, 18324.2482, 18426.6276, 18525.0932, 18629.8976, 18733.2588, 18831.0466, 18940.1366, 19032.2696, 19131.729, 19243.4864, 19349.6932, 19442.866, 19547.9448, 19653.2798, 19754.4034, 19854.0692, 19965.1224, 20065.1774, 20158.2212, 20253.353, 20366.3264, 20463.22, ], // precision 13 &[ 5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, 10775.9916, 10920.4662, 11066.124, 11213.073, 11358.0362, 11508.1006, 11659.1716, 11808.7514, 11959.4884, 12112.1314, 12265.037, 12420.3756, 12578.933, 12734.311, 12890.0006, 13047.2144, 13207.3096, 13368.5144, 13528.024, 13689.847, 13852.7528, 14018.3168, 14180.5372, 14346.9668, 14513.5074, 14677.867, 14846.2186, 15017.4186, 15184.9716, 15356.339, 15529.2972, 15697.3578, 15871.8686, 16042.187, 16216.4094, 16389.4188, 16565.9126, 16742.3272, 16919.0042, 17094.7592, 17273.965, 17451.8342, 17634.4254, 17810.5984, 17988.9242, 18171.051, 18354.7938, 18539.466, 18721.0408, 18904.9972, 19081.867, 19271.9118, 19451.8694, 19637.9816, 19821.2922, 20013.1292, 20199.3858, 20387.8726, 20572.9514, 20770.7764, 20955.1714, 21144.751, 21329.9952, 21520.709, 21712.7016, 21906.3868, 22096.2626, 22286.0524, 22475.051, 22665.5098, 22862.8492, 23055.5294, 23249.6138, 23437.848, 23636.273, 23826.093, 24020.3296, 24213.3896, 24411.7392, 24602.9614, 24805.7952, 24998.1552, 25193.9588, 25389.0166, 25585.8392, 25780.6976, 25981.2728, 26175.977, 26376.5252, 26570.1964, 26773.387, 26962.9812, 27163.0586, 27368.164, 27565.0534, 27758.7428, 27961.1276, 28163.2324, 28362.3816, 28565.7668, 28758.644, 28956.9768, 29163.4722, 29354.7026, 29561.1186, 29767.9948, 29959.9986, 30164.0492, 30366.9818, 30562.5338, 30762.9928, 30976.1592, 31166.274, 31376.722, 31570.3734, 31770.809, 31974.8934, 32179.5286, 32387.5442, 32582.3504, 32794.076, 32989.9528, 33191.842, 33392.4684, 33595.659, 33801.8672, 34000.3414, 34200.0922, 34402.6792, 34610.0638, 34804.0084, 35011.13, 35218.669, 35418.6634, 35619.0792, 35830.6534, 36028.4966, 36229.7902, 36438.6422, 36630.7764, 36833.3102, 37048.6728, 37247.3916, 37453.5904, 37669.3614, 37854.5526, 38059.305, 38268.0936, 38470.2516, 38674.7064, 38876.167, 39068.3794, 39281.9144, 39492.8566, 39684.8628, 39898.4108, 40093.1836, 40297.6858, 40489.7086, 40717.2424, ], // precision 14 &[ 11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6906, 22132.162, 22428.1406, 22722.127, 23020.5606, 23319.7394, 23620.4014, 23925.2728, 24226.9224, 24535.581, 24845.505, 25155.9618, 25470.3828, 25785.9702, 26103.7764, 26420.4132, 26742.0186, 27062.8852, 27388.415, 27714.6024, 28042.296, 28365.4494, 28701.1526, 29031.8008, 29364.2156, 29704.497, 30037.1458, 30380.111, 30723.8168, 31059.5114, 31404.9498, 31751.6752, 32095.2686, 32444.7792, 32794.767, 33145.204, 33498.4226, 33847.6502, 34209.006, 34560.849, 34919.4838, 35274.9778, 35635.1322, 35996.3266, 36359.1394, 36722.8266, 37082.8516, 37447.7354, 37815.9606, 38191.0692, 38559.4106, 38924.8112, 39294.6726, 39663.973, 40042.261, 40416.2036, 40779.2036, 41161.6436, 41540.9014, 41921.1998, 42294.7698, 42678.5264, 43061.3464, 43432.375, 43818.432, 44198.6598, 44583.0138, 44970.4794, 45353.924, 45729.858, 46118.2224, 46511.5724, 46900.7386, 47280.6964, 47668.1472, 48055.6796, 48446.9436, 48838.7146, 49217.7296, 49613.7796, 50010.7508, 50410.0208, 50793.7886, 51190.2456, 51583.1882, 51971.0796, 52376.5338, 52763.319, 53165.5534, 53556.5594, 53948.2702, 54346.352, 54748.7914, 55138.577, 55543.4824, 55941.1748, 56333.7746, 56745.1552, 57142.7944, 57545.2236, 57935.9956, 58348.5268, 58737.5474, 59158.5962, 59542.6896, 59958.8004, 60349.3788, 60755.0212, 61147.6144, 61548.194, 61946.0696, 62348.6042, 62763.603, 63162.781, 63560.635, 63974.3482, 64366.4908, 64771.5876, 65176.7346, 65597.3916, 65995.915, 66394.0384, 66822.9396, 67203.6336, 67612.2032, 68019.0078, 68420.0388, 68821.22, 69235.8388, 69640.0724, 70055.155, 70466.357, 70863.4266, 71276.2482, 71677.0306, 72080.2006, 72493.0214, 72893.5952, 73314.5856, 73714.9852, 74125.3022, 74521.2122, 74933.6814, 75341.5904, 75743.0244, 76166.0278, 76572.1322, 76973.1028, 77381.6284, 77800.6092, 78189.328, 78607.0962, 79012.2508, 79407.8358, 79825.725, 80238.701, 80646.891, 81035.6436, 81460.0448, 81876.3884, ], // precision 15 &[ 23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683.5072, 44266.694, 44851.2822, 45440.7862, 46038.0586, 46640.3164, 47241.064, 47846.155, 48454.7396, 49076.9168, 49692.542, 50317.4778, 50939.65, 51572.5596, 52210.2906, 52843.7396, 53481.3996, 54127.236, 54770.406, 55422.6598, 56078.7958, 56736.7174, 57397.6784, 58064.5784, 58730.308, 59404.9784, 60077.0864, 60751.9158, 61444.1386, 62115.817, 62808.7742, 63501.4774, 64187.5454, 64883.6622, 65582.7468, 66274.5318, 66976.9276, 67688.7764, 68402.138, 69109.6274, 69822.9706, 70543.6108, 71265.5202, 71983.3848, 72708.4656, 73433.384, 74158.4664, 74896.4868, 75620.9564, 76362.1434, 77098.3204, 77835.7662, 78582.6114, 79323.9902, 80067.8658, 80814.9246, 81567.0136, 82310.8536, 83061.9952, 83821.4096, 84580.8608, 85335.547, 86092.5802, 86851.6506, 87612.311, 88381.2016, 89146.3296, 89907.8974, 90676.846, 91451.4152, 92224.5518, 92995.8686, 93763.5066, 94551.2796, 95315.1944, 96096.1806, 96881.0918, 97665.679, 98442.68, 99229.3002, 100011.0994, 100790.6386, 101580.1564, 102377.7484, 103152.1392, 103944.2712, 104730.216, 105528.6336, 106324.9398, 107117.6706, 107890.3988, 108695.2266, 109485.238, 110294.7876, 111075.0958, 111878.0496, 112695.2864, 113464.5486, 114270.0474, 115068.608, 115884.3626, 116673.2588, 117483.3716, 118275.097, 119085.4092, 119879.2808, 120687.5868, 121499.9944, 122284.916, 123095.9254, 123912.5038, 124709.0454, 125503.7182, 126323.259, 127138.9412, 127943.8294, 128755.646, 129556.5354, 130375.3298, 131161.4734, 131971.1962, 132787.5458, 133588.1056, 134431.351, 135220.2906, 136023.398, 136846.6558, 137667.0004, 138463.663, 139283.7154, 140074.6146, 140901.3072, 141721.8548, 142543.2322, 143356.1096, 144173.7412, 144973.0948, 145794.3162, 146609.5714, 147420.003, 148237.9784, 149050.5696, 149854.761, 150663.1966, 151494.0754, 152313.1416, 153112.6902, 153935.7206, 154746.9262, 155559.547, 156401.9746, 157228.7036, 158008.7254, 158820.75, 159646.9184, 160470.4458, 161279.5348, 162093.3114, 162918.542, 163729.2842, ], // precision 16 &[ 47271., 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424, 88530.3358, 89707.3744, 90885.9638, 92080.197, 93275.5738, 94479.391, 95695.918, 96919.2236, 98148.4602, 99382.3474, 100625.6974, 101878.0284, 103141.6278, 104409.4588, 105686.2882, 106967.5402, 108261.6032, 109548.1578, 110852.0728, 112162.231, 113479.0072, 114806.2626, 116137.9072, 117469.5048, 118813.5186, 120165.4876, 121516.2556, 122875.766, 124250.5444, 125621.2222, 127003.2352, 128387.848, 129775.2644, 131181.7776, 132577.3086, 133979.9458, 135394.1132, 136800.9078, 138233.217, 139668.5308, 141085.212, 142535.2122, 143969.0684, 145420.2872, 146878.1542, 148332.7572, 149800.3202, 151269.66, 152743.6104, 154213.0948, 155690.288, 157169.4246, 158672.1756, 160160.059, 161650.6854, 163145.7772, 164645.6726, 166159.1952, 167682.1578, 169177.3328, 170700.0118, 172228.8964, 173732.6664, 175265.5556, 176787.799, 178317.111, 179856.6914, 181400.865, 182943.4612, 184486.742, 186033.4698, 187583.7886, 189148.1868, 190688.4526, 192250.1926, 193810.9042, 195354.2972, 196938.7682, 198493.5898, 200079.2824, 201618.912, 203205.5492, 204765.5798, 206356.1124, 207929.3064, 209498.7196, 211086.229, 212675.1324, 214256.7892, 215826.2392, 217412.8474, 218995.6724, 220618.6038, 222207.1166, 223781.0364, 225387.4332, 227005.7928, 228590.4336, 230217.8738, 231805.1054, 233408.9, 234995.3432, 236601.4956, 238190.7904, 239817.2548, 241411.2832, 243002.4066, 244640.1884, 246255.3128, 247849.3508, 249479.9734, 251106.8822, 252705.027, 254332.9242, 255935.129, 257526.9014, 259154.772, 260777.625, 262390.253, 264004.4906, 265643.59, 267255.4076, 268873.426, 270470.7252, 272106.4804, 273722.4456, 275337.794, 276945.7038, 278592.9154, 280204.3726, 281841.1606, 283489.171, 285130.1716, 286735.3362, 288364.7164, 289961.1814, 291595.5524, 293285.683, 294899.6668, 296499.3434, 298128.0462, 299761.8946, 301394.2424, 302997.6748, 304615.1478, 306269.7724, 307886.114, 309543.1028, 311153.2862, 312782.8546, 314421.2008, 316033.2438, 317692.9636, 319305.2648, 320948.7406, 322566.3364, 324228.4224, 325847.1542, ], // precision 17 &[ 94542., 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, 163329.852, 165569.053, 167837.4005, 170121.6165, 172420.4595, 174732.6265, 177062.77, 179412.502, 181774.035, 184151.939, 186551.6895, 188965.691, 191402.8095, 193857.949, 196305.0775, 198774.6715, 201271.2585, 203764.78, 206299.3695, 208818.1365, 211373.115, 213946.7465, 216532.076, 219105.541, 221714.5375, 224337.5135, 226977.5125, 229613.0655, 232270.2685, 234952.2065, 237645.3555, 240331.1925, 243034.517, 245756.0725, 248517.6865, 251232.737, 254011.3955, 256785.995, 259556.44, 262368.335, 265156.911, 267965.266, 270785.583, 273616.0495, 276487.4835, 279346.639, 282202.509, 285074.3885, 287942.2855, 290856.018, 293774.0345, 296678.5145, 299603.6355, 302552.6575, 305492.9785, 308466.8605, 311392.581, 314347.538, 317319.4295, 320285.9785, 323301.7325, 326298.3235, 329301.3105, 332301.987, 335309.791, 338370.762, 341382.923, 344431.1265, 347464.1545, 350507.28, 353619.2345, 356631.2005, 359685.203, 362776.7845, 365886.488, 368958.2255, 372060.6825, 375165.4335, 378237.935, 381328.311, 384430.5225, 387576.425, 390683.242, 393839.648, 396977.8425, 400101.9805, 403271.296, 406409.8425, 409529.5485, 412678.7, 415847.423, 419020.8035, 422157.081, 425337.749, 428479.6165, 431700.902, 434893.1915, 438049.582, 441210.5415, 444379.2545, 447577.356, 450741.931, 453959.548, 457137.0935, 460329.846, 463537.4815, 466732.3345, 469960.5615, 473164.681, 476347.6345, 479496.173, 482813.1645, 486025.6995, 489249.4885, 492460.1945, 495675.8805, 498908.0075, 502131.802, 505374.3855, 508550.9915, 511806.7305, 515026.776, 518217.0005, 521523.9855, 524705.9855, 527950.997, 531210.0265, 534472.497, 537750.7315, 540926.922, 544207.094, 547429.4345, 550666.3745, 553975.3475, 557150.7185, 560399.6165, 563662.697, 566916.7395, 570146.1215, 573447.425, 576689.6245, 579874.5745, 583202.337, 586503.0255, 589715.635, 592910.161, 596214.3885, 599488.035, 602740.92, 605983.0685, 609248.67, 612491.3605, 615787.912, 619107.5245, 622307.9555, 625577.333, 628840.4385, 632085.2155, 635317.6135, 638691.7195, 641887.467, 645139.9405, 648441.546, 651666.252, 654941.845, ], // precision 18 &[ 189084., 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 349467.132, 354130.629, 358819.432, 363574.626, 368296.587, 373118.482, 377914.93, 382782.301, 387680.669, 392601.981, 397544.323, 402529.115, 407546.018, 412593.658, 417638.657, 422762.865, 427886.169, 433017.167, 438213.273, 443441.254, 448692.421, 453937.533, 459239.049, 464529.569, 469910.083, 475274.03, 480684.473, 486070.26, 491515.237, 496995.651, 502476.617, 507973.609, 513497.19, 519083.233, 524726.509, 530305.505, 535945.728, 541584.404, 547274.055, 552967.236, 558667.862, 564360.216, 570128.148, 575965.08, 581701.952, 587532.523, 593361.144, 599246.128, 605033.418, 610958.779, 616837.117, 622772.818, 628672.04, 634675.369, 640574.831, 646585.739, 652574.547, 658611.217, 664642.684, 670713.914, 676737.681, 682797.313, 688837.897, 694917.874, 701009.882, 707173.648, 713257.254, 719415.392, 725636.761, 731710.697, 737906.209, 744103.074, 750313.39, 756504.185, 762712.579, 768876.985, 775167.859, 781359., 787615.959, 793863.597, 800245.477, 806464.582, 812785.294, 819005.925, 825403.057, 831676.197, 837936.284, 844266.968, 850642.711, 856959.756, 863322.774, 869699.931, 876102.478, 882355.787, 888694.463, 895159.952, 901536.143, 907872.631, 914293.672, 920615.14, 927130.974, 933409.404, 939922.178, 946331.47, 952745.93, 959209.264, 965590.224, 972077.284, 978501.961, 984953.19, 991413.271, 997817.479, 1004222.658, 1010725.676, 1017177.138, 1023612.529, 1030098.236, 1036493.719, 1043112.207, 1049537.036, 1056008.096, 1062476.184, 1068942.337, 1075524.95, 1081932.864, 1088426.025, 1094776.005, 1101327.448, 1107901.673, 1114423.639, 1120884.602, 1127324.923, 1133794.24, 1140328.886, 1146849.376, 1153346.682, 1159836.502, 1166478.703, 1172953.304, 1179391.502, 1185950.982, 1192544.052, 1198913.41, 1205430.994, 1212015.525, 1218674.042, 1225121.683, 1231551.101, 1238126.379, 1244673.795, 1251260.649, 1257697.86, 1264320.983, 1270736.319, 1277274.694, 1283804.95, 1290211.514, 1296858.568, 1303455.691, ], ]; pub(crate) const BIAS_DATA_OFFSET: usize = 4; pub(crate) const BIAS_DATA_VEC: &[&[f64]] = &[ // precision 4 &[ 10., 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480600000000003, -0.226999999999997, -0.322800000000001, -0.382599999999996, -0.511200000000002, -0.669600000000003, -0.749400000000001, -0.500399999999999, -0.617600000000003, -0.6922, -0.601599999999998, -0.416200000000003, -0.338200000000001, -0.782600000000002, -0.648600000000002, -0.919800000000002, -0.851799999999997, -0.962400000000002, -0.6402, -1.1922, -1.0256, -1.086, -1.21899999999999, -0.819400000000002, -0.940600000000003, -1.1554, -1.2072, -1.1752, -1.16759999999999, -1.14019999999999, -1.3754, -1.29859999999999, -1.607, -1.3292, -1.7606, ], // precision 5 &[ 22., 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2.8908, 2.7338, 2.7728, 2.629, 2.413, 2.3266, 2.1524, 2.2642, 2.1806, 2.0566, 1.9192, 1.7598, 1.3516, 1.5802, 1.43859999999999, 1.49160000000001, 1.1524, 1.1892, 0.841399999999993, 0.879800000000003, 0.837599999999995, 0.469800000000006, 0.765600000000006, 0.331000000000003, 0.591399999999993, 0.601200000000006, 0.701599999999999, 0.558199999999999, 0.339399999999998, 0.354399999999998, 0.491200000000006, 0.308000000000007, 0.355199999999996, -0.0254000000000048, 0.205200000000005, -0.272999999999996, 0.132199999999997, 0.394400000000005, -0.241200000000006, 0.242000000000004, 0.191400000000002, 0.253799999999998, -0.122399999999999, -0.370800000000003, 0.193200000000004, -0.0848000000000013, 0.0867999999999967, -0.327200000000005, -0.285600000000002, 0.311400000000006, -0.128399999999999, -0.754999999999995, -0.209199999999996, -0.293599999999998, -0.364000000000004, -0.253600000000006, -0.821200000000005, -0.253600000000006, -0.510400000000004, -0.383399999999995, -0.491799999999998, -0.220200000000006, -0.0972000000000008, -0.557400000000001, -0.114599999999996, -0.295000000000002, -0.534800000000004, 0.346399999999988, -0.65379999999999, 0.0398000000000138, 0.0341999999999985, -0.995800000000003, -0.523400000000009, -0.489000000000004, -0.274799999999999, -0.574999999999989, -0.482799999999997, 0.0571999999999946, -0.330600000000004, -0.628800000000012, -0.140199999999993, -0.540600000000012, -0.445999999999998, -0.599400000000003, -0.262599999999992, 0.163399999999996, -0.100599999999986, -0.39500000000001, -1.06960000000001, -0.836399999999998, -0.753199999999993, -0.412399999999991, -0.790400000000005, -0.29679999999999, -0.28540000000001, -0.193000000000012, -0.0772000000000048, -0.962799999999987, -0.414800000000014, ], // precision 6 &[ 45., 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, 14.3356, 14.2286, 13.7262, 13.3284, 13.1048, 12.5962, 12.3562, 12.1272, 11.4184, 11.4974, 11.0822, 10.856, 10.48, 10.2834, 10.0208, 9.637, 9.51739999999999, 9.05759999999999, 8.74760000000001, 8.42700000000001, 8.1326, 8.2372, 8.2788, 7.6776, 7.79259999999999, 7.1952, 6.9564, 6.6454, 6.87, 6.5428, 6.19999999999999, 6.02940000000001, 5.62780000000001, 5.6782, 5.792, 5.35159999999999, 5.28319999999999, 5.0394, 5.07480000000001, 4.49119999999999, 4.84899999999999, 4.696, 4.54040000000001, 4.07300000000001, 4.37139999999999, 3.7216, 3.7328, 3.42080000000001, 3.41839999999999, 3.94239999999999, 3.27719999999999, 3.411, 3.13079999999999, 2.76900000000001, 2.92580000000001, 2.68279999999999, 2.75020000000001, 2.70599999999999, 2.3886, 3.01859999999999, 2.45179999999999, 2.92699999999999, 2.41720000000001, 2.41139999999999, 2.03299999999999, 2.51240000000001, 2.5564, 2.60079999999999, 2.41720000000001, 1.80439999999999, 1.99700000000001, 2.45480000000001, 1.8948, 2.2346, 2.30860000000001, 2.15479999999999, 1.88419999999999, 1.6508, 0.677199999999999, 1.72540000000001, 1.4752, 1.72280000000001, 1.66139999999999, 1.16759999999999, 1.79300000000001, 1.00059999999999, 0.905200000000008, 0.659999999999997, 1.55879999999999, 1.1636, 0.688199999999995, 0.712600000000009, 0.450199999999995, 1.1978, 0.975599999999986, 0.165400000000005, 1.727, 1.19739999999999, -0.252600000000001, 1.13460000000001, 1.3048, 1.19479999999999, 0.313400000000001, 0.878999999999991, 1.12039999999999, 0.853000000000009, 1.67920000000001, 0.856999999999999, 0.448599999999999, 1.2362, 0.953399999999988, 1.02859999999998, 0.563199999999995, 0.663000000000011, 0.723000000000013, 0.756599999999992, 0.256599999999992, -0.837600000000009, 0.620000000000005, 0.821599999999989, 0.216600000000028, 0.205600000000004, 0.220199999999977, 0.372599999999977, 0.334400000000016, 0.928400000000011, 0.972800000000007, 0.192400000000021, 0.487199999999973, -0.413000000000011, 0.807000000000016, 0.120600000000024, 0.769000000000005, 0.870799999999974, 0.66500000000002, 0.118200000000002, 0.401200000000017, 0.635199999999998, 0.135400000000004, 0.175599999999974, 1.16059999999999, 0.34620000000001, 0.521400000000028, -0.586599999999976, -1.16480000000001, 0.968399999999974, 0.836999999999989, 0.779600000000016, 0.985799999999983, ], // precision 7 &[ 91., 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.147, 26.4364, 25.7592, 25.3386, 24.781, 23.8028, 23.656, 22.6544, 21.996, 21.4718, 21.1544, 20.6098, 19.5956, 19.0616, 18.5758, 18.4878, 17.5244, 17.2146, 16.724, 15.8722, 15.5198, 15.0414, 14.941, 14.9048, 13.87, 13.4304, 13.028, 12.4708, 12.37, 12.0624, 11.4668, 11.5532, 11.4352, 11.2564, 10.2744, 10.2118, 9.74720000000002, 10.1456, 9.2928, 8.75040000000001, 8.55279999999999, 8.97899999999998, 8.21019999999999, 8.18340000000001, 7.3494, 7.32499999999999, 7.66140000000001, 6.90300000000002, 7.25439999999998, 6.9042, 7.21499999999997, 6.28640000000001, 6.08139999999997, 6.6764, 6.30099999999999, 5.13900000000001, 5.65800000000002, 5.17320000000001, 4.59019999999998, 4.9538, 5.08280000000002, 4.92200000000003, 4.99020000000002, 4.7328, 5.4538, 4.11360000000002, 4.22340000000003, 4.08780000000002, 3.70800000000003, 4.15559999999999, 4.18520000000001, 3.63720000000001, 3.68220000000002, 3.77960000000002, 3.6078, 2.49160000000001, 3.13099999999997, 2.5376, 3.19880000000001, 3.21100000000001, 2.4502, 3.52820000000003, 2.91199999999998, 3.04480000000001, 2.7432, 2.85239999999999, 2.79880000000003, 2.78579999999999, 1.88679999999999, 2.98860000000002, 2.50639999999999, 1.91239999999999, 2.66160000000002, 2.46820000000002, 1.58199999999999, 1.30399999999997, 2.27379999999999, 2.68939999999998, 1.32900000000001, 3.10599999999999, 1.69080000000002, 2.13740000000001, 2.53219999999999, 1.88479999999998, 1.33240000000001, 1.45119999999997, 1.17899999999997, 2.44119999999998, 1.60659999999996, 2.16700000000003, 0.77940000000001, 2.37900000000002, 2.06700000000001, 1.46000000000004, 2.91160000000002, 1.69200000000001, 0.954600000000028, 2.49300000000005, 2.2722, 1.33500000000004, 2.44899999999996, 1.20140000000004, 3.07380000000001, 2.09739999999999, 2.85640000000001, 2.29960000000005, 2.40899999999999, 1.97040000000004, 0.809799999999996, 1.65279999999996, 2.59979999999996, 0.95799999999997, 2.06799999999998, 2.32780000000002, 4.20159999999998, 1.96320000000003, 1.86400000000003, 1.42999999999995, 3.77940000000001, 1.27200000000005, 1.86440000000005, 2.20600000000002, 3.21900000000005, 1.5154, 2.61019999999996, ], // precision 8 &[ 183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60.843, 59.5684, 58.1652, 56.5426, 55.4152, 53.5388, 52.3592, 51.1366, 49.486, 48.3918, 46.5076, 45.509, 44.3834, 43.3498, 42.0668, 40.7346, 40.1228, 38.4528, 37.7, 36.644, 36.0518, 34.5774, 33.9068, 32.432, 32.1666, 30.434, 29.6644, 28.4894, 27.6312, 26.3804, 26.292, 25.5496000000001, 25.0234, 24.8206, 22.6146, 22.4188, 22.117, 20.6762, 20.6576, 19.7864, 19.509, 18.5334, 17.9204, 17.772, 16.2924, 16.8654, 15.1836, 15.745, 15.1316, 15.0386, 14.0136, 13.6342, 12.6196, 12.1866, 12.4281999999999, 11.3324, 10.4794000000001, 11.5038, 10.129, 9.52800000000002, 10.3203999999999, 9.46299999999997, 9.79280000000006, 9.12300000000005, 8.74180000000001, 9.2192, 7.51020000000005, 7.60659999999996, 7.01840000000004, 7.22239999999999, 7.40139999999997, 6.76179999999999, 7.14359999999999, 5.65060000000005, 5.63779999999997, 5.76599999999996, 6.75139999999999, 5.57759999999996, 3.73220000000003, 5.8048, 5.63019999999995, 4.93359999999996, 3.47979999999995, 4.33879999999999, 3.98940000000005, 3.81960000000004, 3.31359999999995, 3.23080000000004, 3.4588, 3.08159999999998, 3.4076, 3.00639999999999, 2.38779999999997, 2.61900000000003, 1.99800000000005, 3.34820000000002, 2.95060000000001, 0.990999999999985, 2.11440000000005, 2.20299999999997, 2.82219999999995, 2.73239999999998, 2.7826, 3.76660000000004, 2.26480000000004, 2.31280000000004, 2.40819999999997, 2.75360000000001, 3.33759999999995, 2.71559999999999, 1.7478000000001, 1.42920000000004, 2.39300000000003, 2.22779999999989, 2.34339999999997, 0.87259999999992, 3.88400000000001, 1.80600000000004, 1.91759999999999, 1.16779999999994, 1.50320000000011, 2.52500000000009, 0.226400000000012, 2.31500000000005, 0.930000000000064, 1.25199999999995, 2.14959999999996, 0.0407999999999902, 2.5447999999999, 1.32960000000003, 0.197400000000016, 2.52620000000002, 3.33279999999991, -1.34300000000007, 0.422199999999975, 0.917200000000093, 1.12920000000008, 1.46060000000011, 1.45779999999991, 2.8728000000001, 3.33359999999993, -1.34079999999994, 1.57680000000005, 0.363000000000056, 1.40740000000005, 0.656600000000026, 0.801400000000058, -0.454600000000028, 1.51919999999996, ], // precision 9 &[ 368., 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.5538, 122.5058, 119.2646, 116.5902, 113.3818, 110.8998, 107.9532, 105.2062, 102.2798, 99.4728, 96.9582, 94.3292, 92.171, 89.7809999999999, 87.5716, 84.7048, 82.5322, 79.875, 78.3972, 75.3464, 73.7274, 71.2834, 70.1444, 68.4263999999999, 66.0166, 64.018, 62.0437999999999, 60.3399999999999, 58.6856, 57.9836, 55.0311999999999, 54.6769999999999, 52.3188, 51.4846, 49.4423999999999, 47.739, 46.1487999999999, 44.9202, 43.4059999999999, 42.5342000000001, 41.2834, 38.8954000000001, 38.3286000000001, 36.2146, 36.6684, 35.9946, 33.123, 33.4338, 31.7378000000001, 29.076, 28.9692, 27.4964, 27.0998, 25.9864, 26.7754, 24.3208, 23.4838, 22.7388000000001, 24.0758000000001, 21.9097999999999, 20.9728, 19.9228000000001, 19.9292, 16.617, 17.05, 18.2996000000001, 15.6128000000001, 15.7392, 14.5174, 13.6322, 12.2583999999999, 13.3766000000001, 11.423, 13.1232, 9.51639999999998, 10.5938000000001, 9.59719999999993, 8.12220000000002, 9.76739999999995, 7.50440000000003, 7.56999999999994, 6.70440000000008, 6.41419999999994, 6.71019999999999, 5.60940000000005, 4.65219999999999, 6.84099999999989, 3.4072000000001, 3.97859999999991, 3.32760000000007, 5.52160000000003, 3.31860000000006, 2.06940000000009, 4.35400000000004, 1.57500000000005, 0.280799999999999, 2.12879999999996, -0.214799999999968, -0.0378000000000611, -0.658200000000079, 0.654800000000023, -0.0697999999999865, 0.858400000000074, -2.52700000000004, -2.1751999999999, -3.35539999999992, -1.04019999999991, -0.651000000000067, -2.14439999999991, -1.96659999999997, -3.97939999999994, -0.604400000000169, -3.08260000000018, -3.39159999999993, -5.29640000000018, -5.38920000000007, -5.08759999999984, -4.69900000000007, -5.23720000000003, -3.15779999999995, -4.97879999999986, -4.89899999999989, -7.48880000000008, -5.94799999999987, -5.68060000000014, -6.67180000000008, -4.70499999999993, -7.27779999999984, -4.6579999999999, -4.4362000000001, -4.32139999999981, -5.18859999999995, -6.66879999999992, -6.48399999999992, -5.1260000000002, -4.4032000000002, -6.13500000000022, -5.80819999999994, -4.16719999999987, -4.15039999999999, -7.45600000000013, -7.24080000000004, -9.83179999999993, -5.80420000000004, -8.6561999999999, -6.99940000000015, -10.5473999999999, -7.34139999999979, -6.80999999999995, -6.29719999999998, -6.23199999999997, ], // precision 10 &[ 737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, 286.152, 278.8956, 271.8792, 265.118, 258.62, 252.5132, 245.9322, 239.7726, 233.6086, 227.5332, 222.5918, 216.4294, 210.7662, 205.4106, 199.7338, 194.9012, 188.4486, 183.1556, 178.6338, 173.7312, 169.6264, 163.9526, 159.8742, 155.8326, 151.1966, 147.5594, 143.07, 140.037, 134.1804, 131.071, 127.4884, 124.0848, 120.2944, 117.333, 112.9626, 110.2902, 107.0814, 103.0334, 99.4832000000001, 96.3899999999999, 93.7202000000002, 90.1714000000002, 87.2357999999999, 85.9346, 82.8910000000001, 80.0264000000002, 78.3834000000002, 75.1543999999999, 73.8683999999998, 70.9895999999999, 69.4367999999999, 64.8701999999998, 65.0408000000002, 61.6738, 59.5207999999998, 57.0158000000001, 54.2302, 53.0962, 50.4985999999999, 52.2588000000001, 47.3914, 45.6244000000002, 42.8377999999998, 43.0072, 40.6516000000001, 40.2453999999998, 35.2136, 36.4546, 33.7849999999999, 33.2294000000002, 32.4679999999998, 30.8670000000002, 28.6507999999999, 28.9099999999999, 27.5983999999999, 26.1619999999998, 24.5563999999999, 23.2328000000002, 21.9484000000002, 21.5902000000001, 21.3346000000001, 17.7031999999999, 20.6111999999998, 19.5545999999999, 15.7375999999999, 17.0720000000001, 16.9517999999998, 15.326, 13.1817999999998, 14.6925999999999, 13.0859999999998, 13.2754, 10.8697999999999, 11.248, 7.3768, 4.72339999999986, 7.97899999999981, 8.7503999999999, 7.68119999999999, 9.7199999999998, 7.73919999999998, 5.6224000000002, 7.44560000000001, 6.6601999999998, 5.9058, 4.00199999999995, 4.51699999999983, 4.68240000000014, 3.86220000000003, 5.13639999999987, 5.98500000000013, 2.47719999999981, 2.61999999999989, 1.62800000000016, 4.65000000000009, 0.225599999999758, 0.831000000000131, -0.359400000000278, 1.27599999999984, -2.92559999999958, -0.0303999999996449, 2.37079999999969, -2.0033999999996, 0.804600000000391, 0.30199999999968, 1.1247999999996, -2.6880000000001, 0.0321999999996478, -1.18099999999959, -3.9402, -1.47940000000017, -0.188400000000001, -2.10720000000038, -2.04159999999956, -3.12880000000041, -4.16160000000036, -0.612799999999879, -3.48719999999958, -8.17900000000009, -5.37780000000021, -4.01379999999972, -5.58259999999973, -5.73719999999958, -7.66799999999967, -5.69520000000011, -1.1247999999996, -5.58520000000044, -8.04560000000038, -4.64840000000004, -11.6468000000004, -7.97519999999986, -5.78300000000036, -7.67420000000038, -10.6328000000003, -9.81720000000041, ], // precision 11 &[ 1476., 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531.6776, 517.282, 505.7704, 493.1012, 480.7388, 467.6876, 456.1872, 445.5048, 433.0214, 420.806, 411.409, 400.4144, 389.4294, 379.2286, 369.651, 360.6156, 350.337, 342.083, 332.1538, 322.5094, 315.01, 305.6686, 298.1678, 287.8116, 280.9978, 271.9204, 265.3286, 257.5706, 249.6014, 242.544, 235.5976, 229.583, 220.9438, 214.672, 208.2786, 201.8628, 195.1834, 191.505, 186.1816, 178.5188, 172.2294, 167.8908, 161.0194, 158.052, 151.4588, 148.1596, 143.4344, 138.5238, 133.13, 127.6374, 124.8162, 118.7894, 117.3984, 114.6078, 109.0858, 105.1036, 103.6258, 98.6018000000004, 95.7618000000002, 93.5821999999998, 88.5900000000001, 86.9992000000002, 82.8800000000001, 80.4539999999997, 74.6981999999998, 74.3644000000004, 73.2914000000001, 65.5709999999999, 66.9232000000002, 65.1913999999997, 62.5882000000001, 61.5702000000001, 55.7035999999998, 56.1764000000003, 52.7596000000003, 53.0302000000001, 49.0609999999997, 48.4694, 44.933, 46.0474000000004, 44.7165999999997, 41.9416000000001, 39.9207999999999, 35.6328000000003, 35.5276000000003, 33.1934000000001, 33.2371999999996, 33.3864000000003, 33.9228000000003, 30.2371999999996, 29.1373999999996, 25.2272000000003, 24.2942000000003, 19.8338000000003, 18.9005999999999, 23.0907999999999, 21.8544000000002, 19.5176000000001, 15.4147999999996, 16.9314000000004, 18.6737999999996, 12.9877999999999, 14.3688000000002, 12.0447999999997, 15.5219999999999, 12.5299999999997, 14.5940000000001, 14.3131999999996, 9.45499999999993, 12.9441999999999, 3.91139999999996, 13.1373999999996, 5.44720000000052, 9.82779999999912, 7.87279999999919, 3.67760000000089, 5.46980000000076, 5.55099999999948, 5.65979999999945, 3.89439999999922, 3.1275999999998, 5.65140000000065, 6.3062000000009, 3.90799999999945, 1.87060000000019, 5.17020000000048, 2.46680000000015, 0.770000000000437, -3.72340000000077, 1.16400000000067, 8.05340000000069, 0.135399999999208, 2.15940000000046, 0.766999999999825, 1.0594000000001, 3.15500000000065, -0.287399999999252, 2.37219999999979, -2.86620000000039, -1.63199999999961, -2.22979999999916, -0.15519999999924, -1.46039999999994, -0.262199999999211, -2.34460000000036, -2.8078000000005, -3.22179999999935, -5.60159999999996, -8.42200000000048, -9.43740000000071, 0.161799999999857, -10.4755999999998, -10.0823999999993, ], // precision 12 &[ 2953., 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757, 1119.2072, 1092.2828, 1065.0434, 1038.6264, 1014.3192, 988.5746, 965.0816, 940.1176, 917.9796, 894.5576, 871.1858, 849.9144, 827.1142, 805.0818, 783.9664, 763.9096, 742.0816, 724.3962, 706.3454, 688.018, 667.4214, 650.3106, 633.0686, 613.8094, 597.818, 581.4248, 563.834, 547.363, 531.5066, 520.455400000001, 505.583199999999, 488.366, 476.480799999999, 459.7682, 450.0522, 434.328799999999, 423.952799999999, 408.727000000001, 399.079400000001, 387.252200000001, 373.987999999999, 360.852000000001, 351.6394, 339.642, 330.902400000001, 322.661599999999, 311.662200000001, 301.3254, 291.7484, 279.939200000001, 276.7508, 263.215200000001, 254.811400000001, 245.5494, 242.306399999999, 234.8734, 223.787200000001, 217.7156, 212.0196, 200.793, 195.9748, 189.0702, 182.449199999999, 177.2772, 170.2336, 164.741, 158.613600000001, 155.311, 147.5964, 142.837, 137.3724, 132.0162, 130.0424, 121.9804, 120.451800000001, 114.8968, 111.585999999999, 105.933199999999, 101.705, 98.5141999999996, 95.0488000000005, 89.7880000000005, 91.4750000000004, 83.7764000000006, 80.9698000000008, 72.8574000000008, 73.1615999999995, 67.5838000000003, 62.6263999999992, 63.2638000000006, 66.0977999999996, 52.0843999999997, 58.9956000000002, 47.0912000000008, 46.4956000000002, 48.4383999999991, 47.1082000000006, 43.2392, 37.2759999999998, 40.0283999999992, 35.1864000000005, 35.8595999999998, 32.0998, 28.027, 23.6694000000007, 33.8266000000003, 26.3736000000008, 27.2008000000005, 21.3245999999999, 26.4115999999995, 23.4521999999997, 19.5013999999992, 19.8513999999996, 10.7492000000002, 18.6424000000006, 13.1265999999996, 18.2436000000016, 6.71860000000015, 3.39459999999963, 6.33759999999893, 7.76719999999841, 0.813999999998487, 3.82819999999992, 0.826199999999517, 8.07440000000133, -1.59080000000176, 5.01780000000144, 0.455399999998917, -0.24199999999837, 0.174800000000687, -9.07640000000174, -4.20160000000033, -3.77520000000004, -4.75179999999818, -5.3724000000002, -8.90680000000066, -6.10239999999976, -5.74120000000039, -9.95339999999851, -3.86339999999836, -13.7304000000004, -16.2710000000006, -7.51359999999841, -3.30679999999847, -13.1339999999982, -10.0551999999989, -6.72019999999975, -8.59660000000076, -10.9307999999983, -1.8775999999998, -4.82259999999951, -13.7788, -21.6470000000008, -10.6735999999983, -15.7799999999988, ], // precision 13 &[ 5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292.1006, 2238.1716, 2182.7514, 2128.4884, 2077.1314, 2025.037, 1975.3756, 1928.933, 1879.311, 1831.0006, 1783.2144, 1738.3096, 1694.5144, 1649.024, 1606.847, 1564.7528, 1525.3168, 1482.5372, 1443.9668, 1406.5074, 1365.867, 1329.2186, 1295.4186, 1257.9716, 1225.339, 1193.2972, 1156.3578, 1125.8686, 1091.187, 1061.4094, 1029.4188, 1000.9126, 972.3272, 944.004199999999, 915.7592, 889.965, 862.834200000001, 840.4254, 812.598399999999, 785.924200000001, 763.050999999999, 741.793799999999, 721.466, 699.040799999999, 677.997200000002, 649.866999999998, 634.911800000002, 609.8694, 591.981599999999, 570.2922, 557.129199999999, 538.3858, 521.872599999999, 502.951400000002, 495.776399999999, 475.171399999999, 459.751, 439.995200000001, 426.708999999999, 413.7016, 402.3868, 387.262599999998, 372.0524, 357.050999999999, 342.5098, 334.849200000001, 322.529399999999, 311.613799999999, 295.848000000002, 289.273000000001, 274.093000000001, 263.329600000001, 251.389599999999, 245.7392, 231.9614, 229.7952, 217.155200000001, 208.9588, 199.016599999999, 190.839199999999, 180.6976, 176.272799999999, 166.976999999999, 162.5252, 151.196400000001, 149.386999999999, 133.981199999998, 130.0586, 130.164000000001, 122.053400000001, 110.7428, 108.1276, 106.232400000001, 100.381600000001, 98.7668000000012, 86.6440000000002, 79.9768000000004, 82.4722000000002, 68.7026000000005, 70.1186000000016, 71.9948000000004, 58.998599999999, 59.0492000000013, 56.9818000000014, 47.5338000000011, 42.9928, 51.1591999999982, 37.2740000000013, 42.7220000000016, 31.3734000000004, 26.8090000000011, 25.8934000000008, 26.5286000000015, 29.5442000000003, 19.3503999999994, 26.0760000000009, 17.9527999999991, 14.8419999999969, 10.4683999999979, 8.65899999999965, 9.86720000000059, 4.34139999999752, -0.907800000000861, -3.32080000000133, -0.936199999996461, -11.9916000000012, -8.87000000000262, -6.33099999999831, -11.3366000000024, -15.9207999999999, -9.34659999999712, -15.5034000000014, -19.2097999999969, -15.357799999998, -28.2235999999975, -30.6898000000001, -19.3271999999997, -25.6083999999973, -24.409599999999, -13.6385999999984, -33.4473999999973, -32.6949999999997, -28.9063999999998, -31.7483999999968, -32.2935999999972, -35.8329999999987, -47.620600000002, -39.0855999999985, -33.1434000000008, -46.1371999999974, -37.5892000000022, -46.8164000000033, -47.3142000000007, -60.2914000000019, -37.7575999999972, ], // precision 14 &[ 11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.127, 4588.5606, 4477.7394, 4369.4014, 4264.2728, 4155.9224, 4055.581, 3955.505, 3856.9618, 3761.3828, 3666.9702, 3575.7764, 3482.4132, 3395.0186, 3305.8852, 3221.415, 3138.6024, 3056.296, 2970.4494, 2896.1526, 2816.8008, 2740.2156, 2670.497, 2594.1458, 2527.111, 2460.8168, 2387.5114, 2322.9498, 2260.6752, 2194.2686, 2133.7792, 2074.767, 2015.204, 1959.4226, 1898.6502, 1850.006, 1792.849, 1741.4838, 1687.9778, 1638.1322, 1589.3266, 1543.1394, 1496.8266, 1447.8516, 1402.7354, 1361.9606, 1327.0692, 1285.4106, 1241.8112, 1201.6726, 1161.973, 1130.261, 1094.2036, 1048.2036, 1020.6436, 990.901400000002, 961.199800000002, 924.769800000002, 899.526400000002, 872.346400000002, 834.375, 810.432000000001, 780.659800000001, 756.013800000001, 733.479399999997, 707.923999999999, 673.858, 652.222399999999, 636.572399999997, 615.738599999997, 586.696400000001, 564.147199999999, 541.679600000003, 523.943599999999, 505.714599999999, 475.729599999999, 461.779600000002, 449.750800000002, 439.020799999998, 412.7886, 400.245600000002, 383.188199999997, 362.079599999997, 357.533799999997, 334.319000000003, 327.553399999997, 308.559399999998, 291.270199999999, 279.351999999999, 271.791400000002, 252.576999999997, 247.482400000001, 236.174800000001, 218.774599999997, 220.155200000001, 208.794399999999, 201.223599999998, 182.995600000002, 185.5268, 164.547400000003, 176.5962, 150.689599999998, 157.8004, 138.378799999999, 134.021200000003, 117.614399999999, 108.194000000003, 97.0696000000025, 89.6042000000016, 95.6030000000028, 84.7810000000027, 72.635000000002, 77.3482000000004, 59.4907999999996, 55.5875999999989, 50.7346000000034, 61.3916000000027, 50.9149999999936, 39.0384000000049, 58.9395999999979, 29.633600000001, 28.2032000000036, 26.0078000000067, 17.0387999999948, 9.22000000000116, 13.8387999999977, 8.07240000000456, 14.1549999999988, 15.3570000000036, 3.42660000000615, 6.24820000000182, -2.96940000000177, -8.79940000000352, -5.97860000000219, -14.4048000000039, -3.4143999999942, -13.0148000000045, -11.6977999999945, -25.7878000000055, -22.3185999999987, -24.409599999999, -31.9756000000052, -18.9722000000038, -22.8678000000073, -30.8972000000067, -32.3715999999986, -22.3907999999938, -43.6720000000059, -35.9038, -39.7492000000057, -54.1641999999993, -45.2749999999942, -42.2989999999991, -44.1089999999967, -64.3564000000042, -49.9551999999967, -42.6116000000038, ], // precision 15 &[ 23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095.5072, 9859.694, 9625.2822, 9395.7862, 9174.0586, 8957.3164, 8738.064, 8524.155, 8313.7396, 8116.9168, 7913.542, 7718.4778, 7521.65, 7335.5596, 7154.2906, 6968.7396, 6786.3996, 6613.236, 6437.406, 6270.6598, 6107.7958, 5945.7174, 5787.6784, 5635.5784, 5482.308, 5337.9784, 5190.0864, 5045.9158, 4919.1386, 4771.817, 4645.7742, 4518.4774, 4385.5454, 4262.6622, 4142.74679999999, 4015.5318, 3897.9276, 3790.7764, 3685.13800000001, 3573.6274, 3467.9706, 3368.61079999999, 3271.5202, 3170.3848, 3076.4656, 2982.38400000001, 2888.4664, 2806.4868, 2711.9564, 2634.1434, 2551.3204, 2469.7662, 2396.61139999999, 2318.9902, 2243.8658, 2171.9246, 2105.01360000001, 2028.8536, 1960.9952, 1901.4096, 1841.86079999999, 1777.54700000001, 1714.5802, 1654.65059999999, 1596.311, 1546.2016, 1492.3296, 1433.8974, 1383.84600000001, 1339.4152, 1293.5518, 1245.8686, 1193.50659999999, 1162.27959999999, 1107.19439999999, 1069.18060000001, 1035.09179999999, 999.679000000004, 957.679999999993, 925.300199999998, 888.099400000006, 848.638600000006, 818.156400000007, 796.748399999997, 752.139200000005, 725.271200000003, 692.216, 671.633600000001, 647.939799999993, 621.670599999998, 575.398799999995, 561.226599999995, 532.237999999998, 521.787599999996, 483.095799999996, 467.049599999998, 465.286399999997, 415.548599999995, 401.047399999996, 380.607999999993, 377.362599999993, 347.258799999996, 338.371599999999, 310.096999999994, 301.409199999995, 276.280799999993, 265.586800000005, 258.994399999996, 223.915999999997, 215.925399999993, 213.503800000006, 191.045400000003, 166.718200000003, 166.259000000005, 162.941200000001, 148.829400000002, 141.645999999993, 123.535399999993, 122.329800000007, 89.473399999988, 80.1962000000058, 77.5457999999926, 59.1056000000099, 83.3509999999951, 52.2906000000075, 36.3979999999865, 40.6558000000077, 42.0003999999899, 19.6630000000005, 19.7153999999864, -8.38539999999921, -0.692799999989802, 0.854800000000978, 3.23219999999856, -3.89040000000386, -5.25880000001052, -24.9052000000083, -22.6837999999989, -26.4286000000138, -34.997000000003, -37.0216000000073, -43.430400000012, -58.2390000000014, -68.8034000000043, -56.9245999999985, -57.8583999999973, -77.3097999999882, -73.2793999999994, -81.0738000000129, -87.4530000000086, -65.0254000000132, -57.296399999992, -96.2746000000043, -103.25, -96.081600000005, -91.5542000000132, -102.465200000006, -107.688599999994, -101.458000000013, -109.715800000005, ], // precision 16 &[ 47270., 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424, 19717.3358, 19256.3744, 18795.9638, 18352.197, 17908.5738, 17474.391, 17052.918, 16637.2236, 16228.4602, 15823.3474, 15428.6974, 15043.0284, 14667.6278, 14297.4588, 13935.2882, 13578.5402, 13234.6032, 12882.1578, 12548.0728, 12219.231, 11898.0072, 11587.2626, 11279.9072, 10973.5048, 10678.5186, 10392.4876, 10105.2556, 9825.766, 9562.5444, 9294.2222, 9038.2352, 8784.848, 8533.2644, 8301.7776, 8058.30859999999, 7822.94579999999, 7599.11319999999, 7366.90779999999, 7161.217, 6957.53080000001, 6736.212, 6548.21220000001, 6343.06839999999, 6156.28719999999, 5975.15419999999, 5791.75719999999, 5621.32019999999, 5451.66, 5287.61040000001, 5118.09479999999, 4957.288, 4798.4246, 4662.17559999999, 4512.05900000001, 4364.68539999999, 4220.77720000001, 4082.67259999999, 3957.19519999999, 3842.15779999999, 3699.3328, 3583.01180000001, 3473.8964, 3338.66639999999, 3233.55559999999, 3117.799, 3008.111, 2909.69140000001, 2814.86499999999, 2719.46119999999, 2624.742, 2532.46979999999, 2444.7886, 2370.1868, 2272.45259999999, 2196.19260000001, 2117.90419999999, 2023.2972, 1969.76819999999, 1885.58979999999, 1833.2824, 1733.91200000001, 1682.54920000001, 1604.57980000001, 1556.11240000001, 1491.3064, 1421.71960000001, 1371.22899999999, 1322.1324, 1264.7892, 1196.23920000001, 1143.8474, 1088.67240000001, 1073.60380000001, 1023.11660000001, 959.036400000012, 927.433199999999, 906.792799999996, 853.433599999989, 841.873800000001, 791.1054, 756.899999999994, 704.343200000003, 672.495599999995, 622.790399999998, 611.254799999995, 567.283200000005, 519.406599999988, 519.188400000014, 495.312800000014, 451.350799999986, 443.973399999988, 431.882199999993, 392.027000000002, 380.924200000009, 345.128999999986, 298.901400000002, 287.771999999997, 272.625, 247.253000000026, 222.490600000019, 223.590000000026, 196.407599999977, 176.425999999978, 134.725199999986, 132.4804, 110.445599999977, 86.7939999999944, 56.7038000000175, 64.915399999998, 38.3726000000024, 37.1606000000029, 46.170999999973, 49.1716000000015, 15.3362000000197, 6.71639999997569, -34.8185999999987, -39.4476000000141, 12.6830000000191, -12.3331999999937, -50.6565999999875, -59.9538000000175, -65.1054000000004, -70.7576000000117, -106.325200000021, -126.852200000023, -110.227599999984, -132.885999999999, -113.897200000007, -142.713800000027, -151.145399999979, -150.799200000009, -177.756200000003, -156.036399999983, -182.735199999996, -177.259399999981, -198.663600000029, -174.577600000019, -193.84580000001, ], // precision 17 &[ 94541., 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, 42325.6165, 41348.4595, 40383.6265, 39436.77, 38509.502, 37594.035, 36695.939, 35818.6895, 34955.691, 34115.8095, 33293.949, 32465.0775, 31657.6715, 30877.2585, 30093.78, 29351.3695, 28594.1365, 27872.115, 27168.7465, 26477.076, 25774.541, 25106.5375, 24452.5135, 23815.5125, 23174.0655, 22555.2685, 21960.2065, 21376.3555, 20785.1925, 20211.517, 19657.0725, 19141.6865, 18579.737, 18081.3955, 17578.995, 17073.44, 16608.335, 16119.911, 15651.266, 15194.583, 14749.0495, 14343.4835, 13925.639, 13504.509, 13099.3885, 12691.2855, 12328.018, 11969.0345, 11596.5145, 11245.6355, 10917.6575, 10580.9785, 10277.8605, 9926.58100000001, 9605.538, 9300.42950000003, 8989.97850000003, 8728.73249999998, 8448.3235, 8175.31050000002, 7898.98700000002, 7629.79100000003, 7413.76199999999, 7149.92300000001, 6921.12650000001, 6677.1545, 6443.28000000003, 6278.23450000002, 6014.20049999998, 5791.20299999998, 5605.78450000001, 5438.48800000001, 5234.2255, 5059.6825, 4887.43349999998, 4682.935, 4496.31099999999, 4322.52250000002, 4191.42499999999, 4021.24200000003, 3900.64799999999, 3762.84250000003, 3609.98050000001, 3502.29599999997, 3363.84250000003, 3206.54849999998, 3079.70000000001, 2971.42300000001, 2867.80349999998, 2727.08100000001, 2630.74900000001, 2496.6165, 2440.902, 2356.19150000002, 2235.58199999999, 2120.54149999999, 2012.25449999998, 1933.35600000003, 1820.93099999998, 1761.54800000001, 1663.09350000002, 1578.84600000002, 1509.48149999999, 1427.3345, 1379.56150000001, 1306.68099999998, 1212.63449999999, 1084.17300000001, 1124.16450000001, 1060.69949999999, 1007.48849999998, 941.194499999983, 879.880500000028, 836.007500000007, 782.802000000025, 748.385499999975, 647.991500000004, 626.730500000005, 570.776000000013, 484.000500000024, 513.98550000001, 418.985499999952, 386.996999999974, 370.026500000036, 355.496999999974, 356.731499999994, 255.92200000002, 259.094000000041, 205.434499999974, 165.374500000034, 197.347500000033, 95.718499999959, 67.6165000000037, 54.6970000000438, 31.7395000000251, -15.8784999999916, 8.42500000004657, -26.3754999999655, -118.425500000012, -66.6629999999423, -42.9745000000112, -107.364999999991, -189.839000000036, -162.611499999999, -164.964999999967, -189.079999999958, -223.931499999948, -235.329999999958, -269.639500000048, -249.087999999989, -206.475499999942, -283.04449999996, -290.667000000016, -304.561499999953, -336.784499999951, -380.386500000022, -283.280499999993, -364.533000000054, -389.059499999974, -364.454000000027, -415.748000000021, -417.155000000028, ], // precision 18 &[ 189083., 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 78879.629, 77014.432, 75215.626, 73384.587, 71652.482, 69895.93, 68209.301, 66553.669, 64921.981, 63310.323, 61742.115, 60205.018, 58698.658, 57190.657, 55760.865, 54331.169, 52908.167, 51550.273, 50225.254, 48922.421, 47614.533, 46362.049, 45098.569, 43926.083, 42736.03, 41593.473, 40425.26, 39316.237, 38243.651, 37170.617, 36114.609, 35084.19, 34117.233, 33206.509, 32231.505, 31318.728, 30403.404, 29540.0550000001, 28679.236, 27825.862, 26965.216, 26179.148, 25462.08, 24645.952, 23922.523, 23198.144, 22529.128, 21762.4179999999, 21134.779, 20459.117, 19840.818, 19187.04, 18636.3689999999, 17982.831, 17439.7389999999, 16874.547, 16358.2169999999, 15835.684, 15352.914, 14823.681, 14329.313, 13816.897, 13342.874, 12880.882, 12491.648, 12021.254, 11625.392, 11293.7610000001, 10813.697, 10456.209, 10099.074, 9755.39000000001, 9393.18500000006, 9047.57900000003, 8657.98499999999, 8395.85900000005, 8033., 7736.95900000003, 7430.59699999995, 7258.47699999996, 6924.58200000005, 6691.29399999999, 6357.92500000005, 6202.05700000003, 5921.19700000004, 5628.28399999999, 5404.96799999999, 5226.71100000001, 4990.75600000005, 4799.77399999998, 4622.93099999998, 4472.478, 4171.78700000001, 3957.46299999999, 3868.95200000005, 3691.14300000004, 3474.63100000005, 3341.67200000002, 3109.14000000001, 3071.97400000005, 2796.40399999998, 2756.17799999996, 2611.46999999997, 2471.93000000005, 2382.26399999997, 2209.22400000005, 2142.28399999999, 2013.96100000001, 1911.18999999994, 1818.27099999995, 1668.47900000005, 1519.65800000005, 1469.67599999998, 1367.13800000004, 1248.52899999998, 1181.23600000003, 1022.71900000004, 1088.20700000005, 959.03600000008, 876.095999999903, 791.183999999892, 703.337000000058, 731.949999999953, 586.86400000006, 526.024999999907, 323.004999999888, 320.448000000091, 340.672999999952, 309.638999999966, 216.601999999955, 102.922999999952, 19.2399999999907, -0.114000000059605, -32.6240000000689, -89.3179999999702, -153.497999999905, -64.2970000000205, -143.695999999996, -259.497999999905, -253.017999999924, -213.948000000091, -397.590000000084, -434.006000000052, -403.475000000093, -297.958000000101, -404.317000000039, -528.898999999976, -506.621000000043, -513.205000000075, -479.351000000024, -596.139999999898, -527.016999999993, -664.681000000099, -680.306000000099, -704.050000000047, -850.486000000034, -757.43200000003, -713.308999999892, ], ]; ================================================ FILE: crates/hyperloglogplusplus/src/lib.rs ================================================ #[cfg(test)] extern crate quickcheck; #[cfg(test)] #[macro_use(quickcheck)] extern crate quickcheck_macros; use std::{ hash::{BuildHasher, Hash}, marker::PhantomData, }; pub mod dense; mod hyperloglog_data; pub mod registers; pub mod sparse; #[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] pub struct HyperLogLog<'s, T: ?Sized, B> { storage: HyperLogLogStorage<'s>, pub buildhasher: B, _pd: PhantomData, } #[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] pub enum HyperLogLogStorage<'s> { Sparse(sparse::Storage<'s>), Dense(dense::Storage<'s>), } impl<'s, T, B> HyperLogLog<'s, T, B> { pub fn new(precision: u8, buildhasher: B) -> Self { Self { storage: HyperLogLogStorage::Sparse(sparse::Storage::new(precision)), buildhasher, _pd: PhantomData, } } pub fn from_sparse_parts( bytes: &'s [u8], num_compressed: u64, precision: u8, buildhasher: B, ) -> Self { Self { storage: HyperLogLogStorage::Sparse(sparse::Storage::from_parts( bytes, num_compressed, precision, )), buildhasher, _pd: PhantomData, } } pub fn from_dense_parts(bytes: &'s [u8], precision: u8, buildhasher: B) -> Self { Self { storage: HyperLogLogStorage::Dense(dense::Storage::from_parts(bytes, precision)), buildhasher, _pd: PhantomData, } } pub fn estimate_count(&mut self) -> u64 { use HyperLogLogStorage::*; match &mut self.storage { Sparse(s) => s.estimate_count(), Dense(s) => s.estimate_count(), } } pub fn immutable_estimate_count(&self) -> u64 { use HyperLogLogStorage::*; match &self.storage { Sparse(s) => s.immutable_estimate_count(), Dense(s) => s.estimate_count(), } } pub fn is_sparse(&self) -> bool { use HyperLogLogStorage::*; matches!(&self.storage, Sparse(..)) } pub fn num_bytes(&self) -> usize { use HyperLogLogStorage::*; match &self.storage { Sparse(s) => s.num_bytes(), Dense(s) => s.num_bytes(), } } pub fn to_parts(&mut self) -> &HyperLogLogStorage<'s> { self.merge_all(); &self.storage } pub fn merge_all(&mut self) { match &mut self.storage { HyperLogLogStorage::Sparse(s) => s.merge_buffers(), HyperLogLogStorage::Dense(_) => {} } } pub fn into_owned(&self) -> HyperLogLog<'static, T, B> where B: Clone, { use HyperLogLogStorage::*; let storage = match &self.storage { Sparse(s) => Sparse(s.into_owned()), Dense(s) => Dense(s.into_owned()), }; HyperLogLog { storage, buildhasher: self.buildhasher.clone(), _pd: PhantomData, } } } impl HyperLogLog<'_, T, B> where T: Hash + ?Sized, B: BuildHasher, { pub fn add(&mut self, value: &T) { use HyperLogLogStorage::*; let hash = self.buildhasher.hash_one(value); match &mut self.storage { Sparse(s) => { let overflowing = s.add_hash(hash); if overflowing { let dense = s.to_dense(); self.storage = Dense(dense); } } Dense(s) => s.add_hash(hash), } } pub fn merge_in(&mut self, other: &HyperLogLog<'_, T, B>) { use HyperLogLogStorage::*; match (&mut self.storage, &other.storage) { (Sparse(s), Sparse(o)) => { let overflowing = s.merge_in(o); if overflowing { let dense = s.to_dense(); self.storage = Dense(dense); } } (Sparse(s), Dense(o)) => { let mut dense = s.to_dense(); dense.merge_in(o); self.storage = Dense(dense); } (Dense(s), Sparse(o)) => s.merge_in(&o.immutable_to_dense()), (Dense(s), Dense(o)) => s.merge_in(o), } } } pub(crate) trait Extractable: Sized + Copy + std::ops::Shl + std::ops::Shr { const NUM_BITS: u8; fn extract_bits(&self, high: u8, low: u8) -> Self { self.extract(high, high - low + 1) } fn extract(&self, high: u8, len: u8) -> Self { (*self << (Self::NUM_BITS - 1 - high)) >> (Self::NUM_BITS - len) } fn q(&self) -> u8; } impl Extractable for u64 { const NUM_BITS: u8 = 64; fn q(&self) -> u8 { self.leading_zeros() as u8 + 1 } } impl Extractable for u32 { const NUM_BITS: u8 = 32; fn q(&self) -> u8 { self.leading_zeros() as u8 + 1 } } pub fn error_for_precision(precision: u8) -> f64 { 1.04 / 2.0f64.powi(precision.into()).sqrt() } pub fn precision_for_error(max_error: f64) -> u8 { // error = 1.04/sqrt(number_of_registers) // error*sqrt(number_of_registers) = 1.04 // sqrt(number_of_registers) = 1.04/error // number_of_registers = (1.04/error)^2 let num_registers = (1.04f64 / max_error).powi(2); let precision = num_registers.log2().ceil(); if !(4.0..=18.0).contains(&precision) { panic!("derived precision is not valid, error should be in the range [0.26, 0.00203125]") } precision as u8 } #[cfg(test)] mod tests { use std::collections::HashSet; use fnv::FnvBuildHasher; use quickcheck::TestResult; use super::*; #[test] fn test_asc_4_10k() { let mut hll = HyperLogLog::new(4, FnvBuildHasher::default()); for i in 0..10_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 11113); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 13); assert!(hll.num_bytes() <= (1 << 4) * 6 / 8 + 1); } #[test] fn test_asc_4_100k() { let mut hll = HyperLogLog::new(4, FnvBuildHasher::default()); for i in 0..100_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 108_048); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 13); assert!(hll.num_bytes() <= (1 << 4) * 6 / 8 + 1); } #[test] fn test_asc_4_500k() { let mut hll = HyperLogLog::new(4, FnvBuildHasher::default()); for i in 0..500_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 425_701); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 13); assert!(hll.num_bytes() <= (1 << 4) * 6 / 8 + 1); } #[test] fn test_asc_8_10k() { let mut hll = HyperLogLog::new(8, FnvBuildHasher::default()); for i in 0..10_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 10_536); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 193); assert!(hll.num_bytes() <= (1 << 8) * 6 / 8 + 1); } #[test] fn test_asc_8_100k() { let mut hll = HyperLogLog::new(8, FnvBuildHasher::default()); for i in 0..100_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 121_578); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 193); assert!(hll.num_bytes() <= (1 << 8) * 6 / 8 + 1); } #[test] fn test_asc_8_500k() { let mut hll = HyperLogLog::new(8, FnvBuildHasher::default()); for i in 0..500_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 517_382); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 193); assert!(hll.num_bytes() <= (1 << 8) * 6 / 8 + 1); } #[test] fn test_asc_16_10k() { let mut hll = HyperLogLog::new(16, FnvBuildHasher::default()); for i in 0..10_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 10_001); assert!(hll.is_sparse()); assert_eq!(hll.num_bytes(), 23_181); assert!(hll.num_bytes() <= (1 << 16) * 6 / 8 + 1) } #[test] fn test_asc_16_100k() { let mut hll = HyperLogLog::new(16, FnvBuildHasher::default()); for i in 0..100_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 117_304); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 49_153); assert!(hll.num_bytes() <= (1 << 16) * 6 / 8 + 1) } #[test] fn test_asc_16_500k() { let mut hll = HyperLogLog::new(16, FnvBuildHasher::default()); for i in 0..500_000 { hll.add(&i); } assert_eq!(hll.estimate_count(), 510_445); assert!(!hll.is_sparse()); assert_eq!(hll.num_bytes(), 49_153); assert!(hll.num_bytes() <= (1 << 16) * 6 / 8 + 1) } #[quickcheck] fn quick_hll_16(values: HashSet) -> TestResult { let mut hll = HyperLogLog::new(16, FnvBuildHasher::default()); let expected = values.len() as f64; for value in values { hll.add(&value); } let estimated = hll.estimate_count() as f64; let error = 0.0005 * expected; if expected - error <= estimated && estimated <= expected + error { return TestResult::passed(); } if expected - 10.0 <= estimated && estimated <= expected + 10.0 { return TestResult::passed(); } println!("got {}, expected {} +- {}", estimated, expected, error); TestResult::failed() } #[quickcheck] fn quick_merge_hll_16(values_a: Vec, values_b: Vec) { let mut hll_a = HyperLogLog::new(16, FnvBuildHasher::default()); let mut baseline = HyperLogLog::new(16, FnvBuildHasher::default()); for value in values_a { hll_a.add(&value); baseline.add(&value) } let mut hll_b = HyperLogLog::new(16, FnvBuildHasher::default()); for value in values_b { hll_b.add(&value); baseline.add(&value) } hll_a.merge_all(); hll_b.merge_in(&hll_a); assert_eq!(hll_b.estimate_count(), baseline.estimate_count()) } // FIXME needs hash collision check #[cfg(feature = "flaky_tests")] #[quickcheck] fn quick_merge_hll_8(values_a: Vec, values_b: Vec) { let mut hll_a = HyperLogLog::new(8, FnvBuildHasher::default()); let mut baseline = HyperLogLog::new(8, FnvBuildHasher::default()); for value in &values_a { hll_a.add(value); baseline.add(value) } let mut hll_b = HyperLogLog::new(8, FnvBuildHasher::default()); for value in &values_b { hll_b.add(value); baseline.add(value) } hll_a.merge_all(); hll_b.merge_in(&hll_a); let estimate = hll_b.estimate_count(); let baseline = baseline.estimate_count(); // FIXME // if there's a hash collision between the elements unique to a and b // the counts could be off slightly, check if there is in fact such a // collision if estimate > baseline + 5 || estimate < baseline.saturating_sub(6) { panic!("{} != {}", estimate, baseline) } } #[quickcheck] fn quick_merge_hll_4(values_a: Vec, values_b: Vec) { let mut hll_a = HyperLogLog::new(4, FnvBuildHasher::default()); let mut baseline = HyperLogLog::new(4, FnvBuildHasher::default()); for value in values_a { hll_a.add(&value); baseline.add(&value) } let mut hll_b = HyperLogLog::new(4, FnvBuildHasher::default()); for value in values_b { hll_b.add(&value); baseline.add(&value) } hll_a.merge_all(); hll_b.merge_in(&hll_a); assert_eq!(hll_b.estimate_count(), baseline.estimate_count()) } #[test] fn precision_for_error() { for precision in 4..=18 { assert_eq!( super::precision_for_error(super::error_for_precision(precision)), precision ) } } } ================================================ FILE: crates/hyperloglogplusplus/src/registers.rs ================================================ use std::{borrow::Cow, convert::TryInto, debug_assert}; /// array of 6bit registers, of power-of-2 size // 24 is the LCM of 6 and 8, so we can divide our registers into // blocks of 24 bits and only deal with whole registers as follows: // // b b b b b b|b b // b b b b|b b b b // b b|b b b b b b // // (3 bytes makes 4 whole registers) // We can turn this into a 32bit block like so // // b b b b b b|b b // b b b b|b b b b // b b|b b b b b b // 0 0 0 0 0 0 0 0 // // and treat the block like a regular integer, using shifts to get the // values in and out #[derive(Clone, serde::Serialize, serde::Deserialize, PartialEq, Eq)] pub struct Registers<'s>(Cow<'s, [u8]>); impl<'s> Registers<'s> { /// allocate a new Registers of size `2^exponent` pub fn new(exponent: u8) -> Self { assert!((4..=64).contains(&exponent)); let num_registers: i128 = 1 << exponent; let num_bits = num_registers * 6; // store an additional byte at the end so we can always use 16-bit reads // exhaustive search of the [4, 64] parameter space shows that this // formula works correctly let num_bytes = (num_bits / 8) + 1; let mut bytes = vec![0u8; num_bytes as usize]; // set the extra byte to 0xff so we don't count it as 0 if let Some(byte) = bytes.last_mut() { *byte = 0xff; } Self(bytes.into()) } pub fn from_raw(bytes: &'s [u8]) -> Self { Self(bytes.into()) } #[cfg(test)] pub fn at(&self, idx: usize) -> u8 { // TODO switch chunks_exact_mut() to as_chunks_mut() once stable? let block_num = idx / 4; let idx_in_block = idx % 4; let block = self.0.chunks_exact(3).nth(block_num).unwrap(); let block = u32::from_be_bytes([block[0], block[1], block[2], 0x0]); let value = block >> (8 + 6 * (3 - idx_in_block)); (value & 0x3f) as u8 } pub fn set_max(&mut self, idx: usize, value: u8) { debug_assert!(value < (1 << 6)); let block_num = idx / 4; let idx_in_block = idx % 4; // TODO switch chunks_exact_mut() to as_chunks_mut() once stable? let (a, b, c) = match self.0.to_mut().chunks_exact_mut(3).nth(block_num) { Some([a, b, c, ..]) => (a, b, c), _ => panic!( "index {} out of bounds of {} registers", idx, (self.0.len() - 1) / 3 * 4, ), }; let block = u32::from_be_bytes([*a, *b, *c, 0x0]); let shift = 8 // extra 0 byte at the end + 6 * (3 - idx_in_block); // idx 0 is at the largest offset, so it needs the greatest shift let mask = 0x3f << shift; let value = (value as u32) << shift; let old_value = block & mask; if old_value < value { let block = (block & !mask) | value; let [new_a, new_b, new_c, _] = u32::to_be_bytes(block); *a = new_a; *b = new_b; *c = new_c; } } pub fn bytes(&self) -> &[u8] { &self.0 } pub fn count_zeroed_registers(&self) -> u64 { self.iter().filter(|&b| b == 0).count() as u64 } pub fn iter(&self) -> impl Iterator + '_ { use std::iter::once; // our length should be divisible by 3, plus an extra byte we add debug_assert_eq!(self.0.len() % 3, 1); self.0.chunks_exact(3).flat_map(|bytes| { const LOW_REG_MASK: u32 = (1 << 6) - 1; let [a, b, c]: [u8; 3] = bytes.try_into().unwrap(); let block = u32::from_be_bytes([a, b, c, 0x0]); // TODO replace with // ``` // std::array::IntoIter::new([ // ((block >> 26) & LOW_REG_MASK) as u8, // ((block >> 20) & LOW_REG_MASK) as u8, // ((block >> 14) & LOW_REG_MASK) as u8, // ((block >> 8) & LOW_REG_MASK) as u8, // ]) // ``` // once std::array::IntoIter becomes stable once(((block >> 26) & LOW_REG_MASK) as u8) .chain(once(((block >> 20) & LOW_REG_MASK) as u8)) .chain(once(((block >> 14) & LOW_REG_MASK) as u8)) .chain(once(((block >> 8) & LOW_REG_MASK) as u8)) }) } pub fn byte_len(&self) -> usize { self.0.len() } pub fn merge(a: &Registers<'_>, b: &Registers<'_>) -> Self { if a.0.len() != b.0.len() { panic!( "different register size in merge: {} != {}", a.0.len(), b.0.len() ) } let registers: Vec = (&*a.0).into(); let mut merged = Registers(registers.into()); for (i, v) in b.iter().enumerate() { merged.set_max(i, v); } merged } pub fn into_owned(&self) -> Registers<'static> { Registers(Cow::from(self.0.clone().into_owned())) } } #[cfg(test)] mod test { use super::*; #[test] fn test_last_index_not_clobbered() { for i in 4..14 { let mut regs = Registers::new(i); let read = regs.at((i - 1) as _); assert!(read == 0, "{}: {} = {}", i, read, 0); regs.set_max((i - 1) as _, 0xf); let read = regs.at((i - 1) as _); assert!(read == 0xf, "{}: {} = {}", i, read, 0xf); if i > 1 { let read = regs.at((i - 2) as _); assert!(read == 0, "{}: {} = {}", i, read, 0); regs.set_max((i - 2) as _, 0x3f); let read = regs.at((i - 2) as _); assert!(read == 0x3f, "{}: {} = {}", i, read, 0x3f); let read = regs.at((i - 1) as _); assert!(read == 0xf, "{}: {} = {}", i, read, 0xf); } } } #[test] fn test_last_index_not_clobbers() { for i in 4..14 { let mut regs = Registers::new(i); let read = regs.at((i - 2) as _); assert!(read == 0, "{}: {} = {}", i, read, 0); regs.set_max((i - 2) as _, 0x3c); let read = regs.at((i - 2) as _); assert!(read == 0x3c, "{}: {} = {}", i, read, 0x3c); let read = regs.at((i - 1) as _); assert!(read == 0, "{}: {} = {}", i, read, 0); let read = regs.at((i - 1) as _); assert!(read == 0, "{}: {} = {}", i, read, 0); regs.set_max((i - 1) as _, 0x3f); let read = regs.at((i - 1) as _); assert!(read == 0x3f, "{}: {} = {}", i, read, 0x3f); if i > 1 { let read = regs.at((i - 2) as _); assert!(read == 0x3c, "{}: {} = {}", i, read, 0x3c); } } } #[test] fn test_count_empty() { assert_eq!(Registers::new(4).count_zeroed_registers(), 16); } #[test] fn test_count_4() { let registers = Registers::new(4); assert_eq!(registers.count_zeroed_registers(), 16); } #[test] fn test_count_5() { let registers = Registers::new(5); assert_eq!(registers.count_zeroed_registers(), 32); } #[test] fn test_count_6() { let registers = Registers::new(6); assert_eq!(registers.count_zeroed_registers(), 64); } #[test] fn test_count_7() { let registers = Registers::new(7); assert_eq!(registers.count_zeroed_registers(), 128); } #[test] fn test_iter_4_0_1() { let mut registers = Registers::new(4); registers.set_max(0, 1); let values: Vec<_> = registers.iter().collect(); let mut expected = [0; 16]; expected[0] = 1; assert_eq!(values, expected); } #[quickcheck] fn quick_test(exp: u8, ops: Vec<(usize, u8)>) -> quickcheck::TestResult { use quickcheck::TestResult; use std::cmp::max; if !(4..=16).contains(&exp) { return TestResult::discard(); } let size = 1 << exp; let mut reference = vec![0; size]; let mut registers = Registers::new(exp); for (idx, val) in ops { let fixed_idx = idx % size; let val = val & 0x3f; reference[fixed_idx] = max(val, reference[fixed_idx]); registers.set_max(fixed_idx, val); } let mut expected_count = 0; for (idx, val) in reference.iter().enumerate() { if registers.at(idx) != *val { return TestResult::failed(); } if *val == 0 { expected_count += 1; } } let expeceted_len = reference.len(); let mut actual_len = 0; for (i, (a, b)) in reference.iter().zip(registers.iter()).enumerate() { if *a != b { println!("value mismatch @ {}, expected {}, got {}", i, a, b,); return TestResult::failed(); } actual_len += 1 } if expeceted_len != actual_len { println!( "iter len mismatch, expected {}, got {}", expeceted_len, actual_len, ); return TestResult::failed(); } let actual_count = registers.count_zeroed_registers(); if actual_count != expected_count { println!( "count mismatch, expected {}, got {}", expected_count, actual_count, ); return TestResult::failed(); } TestResult::passed() } #[quickcheck] fn quick_merge( exp: u8, ops_a: Vec<(usize, u8)>, ops_b: Vec<(usize, u8)>, ) -> quickcheck::TestResult { use quickcheck::TestResult; if !(4..=16).contains(&exp) { return TestResult::discard(); } let size = 1 << exp; let mut reference = Registers::new(exp); let mut a = Registers::new(exp); for (idx, val) in ops_a { let fixed_idx = idx % size; let val = val & 0x3f; a.set_max(fixed_idx, val); reference.set_max(fixed_idx, val); } let mut b = Registers::new(exp); for (idx, val) in ops_b { let fixed_idx = idx % size; let val = val & 0x3f; b.set_max(fixed_idx, val); reference.set_max(fixed_idx, val); } let merged = Registers::merge(&a, &b); assert_eq!(&*merged.0, &*reference.0); TestResult::passed() } } ================================================ FILE: crates/hyperloglogplusplus/src/sparse/varint.rs ================================================ use std::borrow::Cow; use encodings::{delta, prefix_varint}; use super::Encoded; pub fn decompression_iter<'a>( Compressed(bytes): &'a Compressed<'_>, ) -> impl Iterator + 'a { prefix_varint::u64_decompressor(bytes) .map(delta::u64_decoder()) .map(|v| Encoded(v as u32)) } #[derive(Default, serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] pub struct Compressed<'c>(Cow<'c, [u8]>); impl<'c> Compressed<'c> { pub fn from_raw(bytes: &'c [u8]) -> Self { Self(bytes.into()) } pub fn bytes(&self) -> &[u8] { &self.0 } pub fn num_bytes(&self) -> usize { self.0.len() } #[allow(dead_code)] pub fn cap(&self) -> usize { self.0.len() } pub fn make_owned(&self) -> Compressed<'static> { Compressed(Cow::from(self.0.clone().into_owned())) } } pub struct Compressor u64> { compressor: prefix_varint::U64Compressor, buffer: Option, num_compressed: u64, } // TODO add capacity pub fn compressor() -> Compressor u64> { Compressor { compressor: prefix_varint::U64Compressor::with(delta::u64_encoder()), buffer: None, num_compressed: 0, } } impl u64> Compressor { pub fn is_empty(&self) -> bool { self.buffer.is_none() && self.compressor.is_empty() } pub fn last_mut(&mut self) -> Option<&mut Encoded> { self.buffer.as_mut() } pub fn push(&mut self, value: Encoded) { if let Some(val) = self.buffer.take() { self.compress_value(val) } self.buffer = Some(value) } pub fn into_compressed(mut self) -> (Compressed<'static>, u64) { if let Some(val) = self.buffer.take() { self.compress_value(val) } ( Compressed(self.compressor.finish().into()), self.num_compressed, ) } fn compress_value(&mut self, Encoded(value): Encoded) { self.num_compressed += 1; self.compressor.push(value.into()); } } impl u64> Extend for Compressor { fn extend>(&mut self, iter: T) { for e in iter { self.push(e) } } } #[cfg(test)] mod test { use super::*; #[quickcheck] fn quick_test_roundtrip(values: Vec) -> bool { let mut compressor = compressor(); for val in &values { compressor.push(Encoded(*val)); } let (blocks, count) = compressor.into_compressed(); let decompressed = decompression_iter(&blocks); let expected_len = values.len(); let mut actual_len = 0; for (i, (a, b)) in values.iter().zip(decompressed).enumerate() { if *a != b.0 { println!("value mismatch @ {}, expected {}, got {}", i, a, b.0,); return false; } actual_len += 1 } if expected_len != actual_len { println!( "iter len mismatch, expected {}, got {}", expected_len, actual_len, ); return false; } if expected_len as u64 != count { println!( "compression count mismatch, expected {}, got {}", expected_len, count, ); return false; } true } } ================================================ FILE: crates/hyperloglogplusplus/src/sparse.rs ================================================ use std::{ cmp::{ min, Ordering::{Equal, Greater, Less}, }, collections::HashSet, }; use crate::{dense, Extractable}; use self::varint::*; mod varint; #[derive(serde::Serialize, serde::Deserialize, Clone, PartialEq, Eq)] pub struct Storage<'s> { to_merge: HashSet, pub compressed: Compressed<'s>, pub num_compressed: u64, pub precision: u8, } #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[repr(transparent)] pub struct Encoded(u32); const NUM_HIGH_BITS: u8 = 25; pub type Overflowing = bool; impl<'s> Storage<'s> { pub fn new(precision: u8) -> Self { // TODO what is max precision assert!( (4..=18).contains(&precision), "invalid value for precision: {precision}; must be within [4, 18]", ); Self { to_merge: Default::default(), compressed: Default::default(), num_compressed: 0, precision, } } pub fn from_parts(bytes: &'s [u8], num_compressed: u64, precision: u8) -> Self { // TODO what is max precision assert!( (4..=18).contains(&precision), "invalid value for precision: {precision}; must be within [4, 18]", ); Self { to_merge: Default::default(), compressed: Compressed::from_raw(bytes), num_compressed, precision, } } pub fn into_owned(&self) -> Storage<'static> { Storage { to_merge: self.to_merge.clone(), compressed: self.compressed.make_owned(), num_compressed: self.num_compressed, precision: self.precision, } } pub fn add_hash(&mut self, hash: u64) -> Overflowing { let encoded = Encoded::from_hash(hash, self.precision); self.add_encoded(encoded) } fn add_encoded(&mut self, encoded: Encoded) -> Overflowing { self.to_merge.insert(encoded); let max_sparse_bitsize = (1u64 << self.precision) * 6; // TODO what threshold? if self.to_merge.len() as u64 * 32 > max_sparse_bitsize / 4 { self.merge_buffers(); return self.compressed.num_bytes() as u64 * 8 > max_sparse_bitsize; } false } pub fn estimate_count(&mut self) -> u64 { self.merge_buffers(); self.immutable_estimate_count() } pub fn immutable_estimate_count(&self) -> u64 { if !self.to_merge.is_empty() { panic!("tried to estimate count with unmerged state") } let m_p = 1 << NUM_HIGH_BITS; let v = (m_p - self.num_compressed) as f64; let m_p = m_p as f64; (m_p * (m_p / v).ln()) as u64 } pub fn merge_buffers(&mut self) { if self.to_merge.is_empty() { return; } let mut temp: Vec<_> = self.to_merge.drain().collect(); temp.sort_unstable(); temp.dedup_by_key(|e| e.idx()); // TODO set original cap to self.compressed.cap() let mut new_compressed = compressor(); let mut a = decompression_iter(&self.compressed).peekable(); let mut b = temp.into_iter().fuse().peekable(); let mut merge_in = |to_merge_in| { if new_compressed.is_empty() { new_compressed.push(to_merge_in); return; } let prev = new_compressed.last_mut().unwrap(); if prev.idx() != to_merge_in.idx() { new_compressed.push(to_merge_in); return; } if prev.count(NUM_HIGH_BITS) < to_merge_in.count(NUM_HIGH_BITS) { *prev = to_merge_in; } }; while let (Some(val_a), Some(val_b)) = (a.peek(), b.peek()) { let (idx_a, idx_b) = (val_a.idx(), val_b.idx()); let to_merge_in = match idx_a.cmp(&idx_b) { Less => a.next().unwrap(), Greater => b.next().unwrap(), Equal => { let (a, b) = (a.next().unwrap(), b.next().unwrap()); min(a, b) } }; merge_in(to_merge_in); } a.for_each(&mut merge_in); b.for_each(merge_in); let (compressed, count) = new_compressed.into_compressed(); self.compressed = compressed; self.num_compressed = count; } fn iter(&self) -> impl Iterator + '_ { decompression_iter(&self.compressed) } pub fn to_dense(&mut self) -> dense::Storage<'static> { self.merge_buffers(); self.immutable_to_dense() } pub fn immutable_to_dense(&self) -> dense::Storage<'static> { if !self.to_merge.is_empty() { panic!("tried to generate dense storage with unmerged state") } let mut dense = dense::Storage::new(self.precision); for encoded in self.iter() { dense.add_encoded(encoded) } dense } pub fn num_bytes(&self) -> usize { self.compressed.num_bytes() } pub fn merge_in(&mut self, other: &Storage<'_>) -> Overflowing { assert!( self.precision == other.precision, "precision must be equal (left={}, right={})", self.precision, other.precision ); assert!(other.to_merge.is_empty()); let mut overflowing = false; for encoded in other.iter() { overflowing = self.add_encoded(encoded) } overflowing } } impl Encoded { pub(crate) fn from_hash(hash: u64, precision: u8) -> Self { // Encoded form // // | idx | count | tag | // | 25 | 6* | 1 | // // *`count` is only present when `tag` is `1` let idx = hash.extract(63, NUM_HIGH_BITS) as u32; let diff = hash.extract_bits(63 - precision, 64 - NUM_HIGH_BITS); if diff == 0 { // TODO is this right? let count = hash.extract_bits(63 - NUM_HIGH_BITS, 0).q() as u32 - NUM_HIGH_BITS as u32; Encoded((idx << 7) | (count << 1) | 1) } else { Encoded(idx << 1) } } pub fn idx(&self) -> u32 { if self.stores_count() { self.0 >> 7 } else { self.0 >> 1 } } pub fn count(&self, p: u8) -> u8 { if self.stores_count() { let extra_bits = NUM_HIGH_BITS - p; self.extract_count() + extra_bits } else { let new_hash = (self.idx() as u64) << (64 - NUM_HIGH_BITS); let hash_bits = new_hash.extract_bits(63 - p, 0); hash_bits.q() - p } } #[inline] fn stores_count(&self) -> bool { self.0 & 1 == 1 } #[inline] fn extract_count(&self) -> u8 { self.0.extract_bits(6, 1) as u8 } } impl PartialOrd for Encoded { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } // The canonical ordering is by ascending index, then descending count. // This allows us to deduplicate by index after sorting. impl Ord for Encoded { fn cmp(&self, other: &Self) -> std::cmp::Ordering { let idx_cmp = self.idx().cmp(&other.idx()); if let Equal = idx_cmp { return match (self.stores_count(), other.stores_count()) { (false, false) => Equal, (true, false) => Less, (false, true) => Greater, (true, true) => self.extract_count().cmp(&other.extract_count()).reverse(), }; } idx_cmp } } #[cfg(test)] mod tests { use fnv::FnvHasher; use quickcheck::TestResult; use super::*; use std::hash::{Hash, Hasher}; const NUM_HASH_BITS: u8 = 64 - NUM_HIGH_BITS; pub fn hash(val: i32) -> u64 { let mut hasher = FnvHasher::default(); val.hash(&mut hasher); hasher.finish() } #[test] fn test_asc_10k() { let mut hll = Storage::new(16); for i in 0..10_000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 10_001) } #[test] fn test_asc_100k() { let mut hll = Storage::new(16); for i in 0..100_000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 100_149); assert_eq!(hll.compressed.num_bytes(), 184_315); } #[test] fn test_asc_500k() { let mut hll = Storage::new(16); for i in 0..500_000 { hll.add_hash(hash(i)); } assert_eq!(hll.estimate_count(), 471_229); assert_eq!(hll.compressed.num_bytes(), 690_301); } #[quickcheck] fn quick_sparse(values: Vec) -> TestResult { if values.len() >= (1 << NUM_HASH_BITS) { return TestResult::discard(); } let mut hll = Storage::new(16); let expected = values.iter().collect::>().len() as f64; for value in values { hll.add_hash(value); } let estimated = hll.estimate_count() as f64; let error = 0.001 * expected; if expected - error <= estimated && estimated <= expected + error { return TestResult::passed(); } if estimated <= expected + 10.0 && estimated >= expected - 10.0 { return TestResult::passed(); } println!("got {}, expected {} +- {}", estimated, expected, error); TestResult::failed() } #[quickcheck] fn quick_sparse_as_set(values: Vec) -> TestResult { if values.len() >= (1 << NUM_HASH_BITS) { return TestResult::discard(); } let mut hll = Storage::new(16); for value in &values { hll.add_hash(*value); } hll.merge_buffers(); let mut expected: Vec<_> = values .into_iter() .map(|h| Encoded::from_hash(h, 16)) .collect(); expected.sort_unstable(); // println!("pre_sort {:?}", temp); expected.dedup_by_key(|e| e.idx()); let expected_len = expected.len(); let mut actual_len = 0; for (i, (a, b)) in expected.iter().zip(hll.iter()).enumerate() { if *a != b { println!("value mismatch @ {}, expected {}, got {}", i, a.0, b.0,); return TestResult::failed(); } actual_len += 1 } if expected_len != actual_len { println!( "iter len mismatch, expected {}, got {}", expected_len, actual_len, ); return TestResult::failed(); } TestResult::passed() } #[quickcheck] fn quick_sparse_merge_invariant(values: Vec) -> TestResult { if values.len() >= (1 << NUM_HASH_BITS) { return TestResult::discard(); } let mut hlla = Storage::new(16); let mut hllb = Storage::new(16); for value in &values { hlla.add_hash(*value); hllb.add_hash(*value); hllb.merge_buffers() } hlla.merge_buffers(); for (i, (a, b)) in hlla.iter().zip(hllb.iter()).enumerate() { if a != b { println!("value mismatch @ {}, expected {}, got {}", i, a.0, b.0,); return TestResult::failed(); } } let expected_len = hlla.iter().count(); let actual_len = hllb.iter().count(); if expected_len != actual_len { println!( "iter len mismatch, expected {}, got {}", expected_len, actual_len, ); return TestResult::failed(); } TestResult::passed() } // fn encoded_order() { // } #[test] fn sparse_merge_01() { let mut hlla = Storage::new(16); let mut hllb = Storage::new(16); let values = [0, 1]; for value in &values { hlla.add_hash(*value); } hlla.merge_buffers(); for value in &values { hllb.add_hash(*value); hllb.merge_buffers() } let a: Vec<_> = hlla.iter().collect(); let b: Vec<_> = hllb.iter().collect(); assert_eq!(a, b) } } ================================================ FILE: crates/scripting-utilities/Readme.md ================================================ # Scripting Utilities # Small helper crates for writing scripty code, such as found in tools. Contains code that's _just_ complicated or irritating enough that it's worth deduplicating instead of copy/pasting, but still simple enough to be appropriate for scripty code. We care about compile times for this code, so in general try to keep the crates small, simple, and easy to understand. In accordance with this, this dir contains a bunch of micro crates instead of one medium utility crate in hopes that this will keep them more focused and prevent them from metastasizing, and take advantage of compiler parallelism. ================================================ FILE: crates/scripting-utilities/control_file_reader/Cargo.toml ================================================ [package] name = "control_file_reader" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] ================================================ FILE: crates/scripting-utilities/control_file_reader/src/lib.rs ================================================ /// Code to extract info from `timescaledb_toolkit.control` /// This crate exists so we have a single source of truth for the format. use std::fmt; pub type Result = std::result::Result; /// extract the current version from the control file pub fn get_current_version(control_file: &str) -> Result { get_field_val(control_file, "version").map(|v| v.to_string()) } /// extract the list of versions we're upgradeable-from from the control file pub fn get_upgradeable_from(control_file: &str) -> Result> { // versions is a comma-delimited list of versions let versions = get_field_val(control_file, "upgradeable_from")?; let versions = versions .split_terminator(',') .map(|version| version.trim().to_string()) .collect(); Ok(versions) } /// find a ` = ''` in `file` and extract `` pub fn get_field_val<'a>(file: &'a str, field_name: &str) -> Result<&'a str> { file.lines() .filter(|line| line.starts_with(field_name) || line.starts_with(&format!("# {field_name}"))) .map(get_quoted_field) .next() .ok_or(Error::FieldNotFound) .and_then(|e| e) // flatten the nested results } // given a ` = ''` extract `` pub fn get_quoted_field(line: &str) -> Result<&str> { let quoted = line.split('=').nth(1).ok_or(Error::NoValue)?; quoted .trim_start() .split_terminator('\'') .find(|s| !s.is_empty()) .ok_or(Error::UnquotedValue) } pub enum Error { FieldNotFound, NoValue, UnquotedValue, } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self { Self::FieldNotFound => write!(f, "cannot read field"), Self::NoValue => write!(f, "cannot find value"), Self::UnquotedValue => write!(f, "unquoted value"), } } } impl fmt::Debug for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self, f) } } ================================================ FILE: crates/scripting-utilities/postgres_connection_configuration/Cargo.toml ================================================ [package] name = "postgres_connection_configuration" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] ================================================ FILE: crates/scripting-utilities/postgres_connection_configuration/src/lib.rs ================================================ /// Config utility for connecting to multiple DBs in the same cluster // JOSH - I'm not sure if this really warrants a crate, but it seems like if we // ever change this it'll be annoying to hunt down everything ¯\_(ツ)_/¯ #[derive(Copy, Clone)] pub struct ConnectionConfig<'s> { pub host: Option<&'s str>, pub port: Option<&'s str>, pub user: Option<&'s str>, pub password: Option<&'s str>, pub database: Option<&'s str>, } impl<'s> ConnectionConfig<'s> { pub fn with_db<'d>(&self, database: &'d str) -> ConnectionConfig<'d> where 's: 'd, { ConnectionConfig { database: Some(database), ..*self } } /// get a config string we can use to connect to the db pub fn config_string(&self) -> String { use std::fmt::Write; let ConnectionConfig { host, port, user, password, database, } = self; let mut config = String::new(); if let Some(host) = host { let _ = write!(&mut config, "host={host} "); } if let Some(port) = port { let _ = write!(&mut config, "port={port} "); } let _ = match user { Some(user) => write!(&mut config, "user={user} "), None => write!(&mut config, "user=postgres "), }; if let Some(password) = password { let _ = write!(&mut config, "password={password} "); } if let Some(database) = database { let _ = write!(&mut config, "dbname={database} "); } config } } ================================================ FILE: crates/stats-agg/Cargo.toml ================================================ [package] name = "stats_agg" version = "0.1.0" authors = ["davidkohn88 "] edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] flat_serialize = {path="../flat_serialize/flat_serialize"} flat_serialize_macro = {path="../flat_serialize/flat_serialize_macro"} serde = { version = "1.0", features = ["derive"] } twofloat = { version = "0.8.4", features = ["serde"] } num-traits = "0.2.15" [dev-dependencies] approx = "0.5.1" ================================================ FILE: crates/stats-agg/src/lib.rs ================================================ // stats is a small statistical regression lib that implements the Youngs-Cramer algorithm and is based on the Postgres implementation // here for 1D regression analysis: // And here for 2D regression analysis: // https://github.com/postgres/postgres/blob/472e518a44eacd9caac7d618f1b6451672ca4481/src/backend/utils/adt/float.c#L3260 // pub use twofloat::TwoFloat; pub trait FloatLike: num_traits::NumOps + num_traits::NumAssignOps + num_traits::Float + From { /// Shorthand for `>::from(val)` fn lit(val: f64) -> Self { >::from(val) } fn from_u64(n: u64) -> Self; } impl FloatLike for f64 { fn from_u64(n: u64) -> Self { n as f64 } } impl FloatLike for TwoFloat { fn from_u64(n: u64) -> Self { (n as f64).into() } } #[derive(Debug, PartialEq, Eq)] pub enum StatsError { DoubleOverflow, } #[derive(Debug, PartialEq, Eq)] pub struct XYPair { pub x: T, pub y: T, } // The threshold at which we should re-calculate when we're doing the inverse transition in a windowed aggregate // essentially, if we're shifting the data by enough as we remove a value from the aggregate we can end up with // extra floating point error because in real arithmetic x = x + C - C // but in floating point arithmetic, if C is large compared to x, we can accumulate significant error. // In our case, because C is added in the normal transition or combine function, and then removed later in the // inverse function, we have x + C and C and we are testing the following: C / (x + C) > INV_FLOATING_ERROR_THRESHOLD // Because of the way that Postgres performs inverse functions, if we return a NULL value, the only thing that happens // is that the partial will get re-calculated from scratch from the values in the window function. So providing // the inverse function is purely an optimization. There are several cases where the C/(x + C) is likely to be larger // than our threshold, but we don't care too much, namely when there are one or two values this can happen frequently, // but then the cost of recalculation is low, compared to when there are many values in a rolling calculation, so we // test early in the function for whether we need to recalculate and pass NULL quickly so that we don't affect those // cases too heavily. #[cfg(not(test))] const INV_FLOATING_ERROR_THRESHOLD: f64 = 0.99; #[cfg(test)] // don't have a threshold for tests, to ensure the inverse function is better tested const INV_FLOATING_ERROR_THRESHOLD: f64 = f64::INFINITY; pub mod stats1d; pub mod stats2d; // This will wrap the logic for incrementing the sum for the third moment of a series of floats (i.e. Sum (i=1..N) of (i-avg)^3) // Math is sourced from https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics mod m3 { use super::*; // Add a value x to the set. n, sx, sxx, sx3 are the values from prior to including x. pub(crate) fn accum(n: T, sx: T, sxx: T, sx3: T, x: T) -> T { let delta = x - (sx / n); let n = n + T::one(); sx3 + delta.powi(3) * (n - T::one()) * (n - T::lit(2.)) / n.powi(2) - (T::lit(3.) * delta * sxx / n) } // Remove a value x from the set. Here n, sx, sxx are all the values from the set after x has been removed. // old_sx3 is the current value prior to the remove (sx3 after the removal is the returned value) pub(crate) fn remove(new_n: T, new_sx: T, new_sxx: T, old_sx3: T, x: T) -> T { let delta = x - (new_sx / new_n); let n = new_n + T::one(); old_sx3 - (delta.powi(3) * (n - T::one()) * (n - T::lit(2.)) / n.powi(2) - (T::lit(3.) * delta * new_sxx / n)) } // Combine two sets a and b and returns the sx3 for the combined set. #[allow(clippy::too_many_arguments)] pub(crate) fn combine( na: T, nb: T, sxa: T, sxb: T, sxxa: T, sxxb: T, sx3a: T, sx3b: T, ) -> T { let nx = na + nb; let delta = sxb / nb - sxa / na; sx3a + sx3b + delta.powi(3) * na * nb * (na - nb) / nx.powi(2) + (na * sxxb - (nb * sxxa)) * T::lit(3.) * delta / nx } // This removes set b from a combined set, returning the sx3 of the remaining set a. // Note that na, sxa, sxxa are all the values computed on the remaining set. old_sx3 is the sx3 of the combined set. #[allow(clippy::too_many_arguments)] pub(crate) fn remove_combined( new_na: T, nb: T, new_sxa: T, sxb: T, new_sxxa: T, sxxb: T, old_sx3: T, sx3b: T, ) -> T { let nx = new_na + nb; let delta = sxb / nb - new_sxa / new_na; old_sx3 - (sx3b + delta.powi(3) * new_na * nb * (new_na - nb) / nx.powi(2) + T::lit(3.) * (new_na * sxxb - (nb * new_sxxa)) * delta / nx) } } // This will wrap the logic for incrementing the sum for the fourth moment of a series of floats (i.e. Sum (i=1..N) of (i-avg)^4) // Math is sourced from https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics mod m4 { use super::*; // Add a value x to the set. n, sx, sxx, sx3, sx4 are the values from prior to including x. pub(crate) fn accum(n: T, sx: T, sxx: T, sx3: T, sx4: T, x: T) -> T { let delta = x - (sx / n); let n = n + T::one(); sx4 + delta.powi(4) * (n - T::one()) * (n.powi(2) - T::lit(3.) * n + T::lit(3.)) / n.powi(3) + T::lit(6.) * delta.powi(2) * sxx / n.powi(2) - T::lit(4.) * delta * sx3 / n } // Remove a value x from the set. Here n, sx, sxx, sx3 are all the values from the set after x has been removed. // old_sx4 is the current value prior to the remove (sx4 after the removal is the returned value) pub(crate) fn remove( new_n: T, new_sx: T, new_sxx: T, new_sx3: T, old_sx4: T, x: T, ) -> T { let delta = x - (new_sx / new_n); let n = new_n + T::one(); old_sx4 - (delta.powi(4) * (n - T::one()) * (n.powi(2) - T::lit(3.) * n + T::lit(3.)) / n.powi(3) + T::lit(6.) * delta.powi(2) * new_sxx / n.powi(2) - T::lit(4.) * delta * new_sx3 / n) } // Combine two sets a and b and returns the sx4 for the combined set. #[allow(clippy::too_many_arguments)] pub(crate) fn combine( na: T, nb: T, sxa: T, sxb: T, sxxa: T, sxxb: T, sx3a: T, sx3b: T, sx4a: T, sx4b: T, ) -> T { let nx = na + nb; let delta = sxb / nb - sxa / na; sx4a + sx4b + delta.powi(4) * na * nb * (na.powi(2) - na * nb + nb.powi(2)) / nx.powi(3) + T::lit(6.) * (na.powi(2) * sxxb + nb.powi(2) * sxxa) * delta.powi(2) / nx.powi(2) + T::lit(4.) * (na * sx3b - nb * sx3a) * delta / nx } // This removes set b from a combined set, returning the sx4 of the remaining set a. // Note that na, sxa, sxxa, sx3a are all the values computed on the remaining set. old_sx4 is the sx4 of the combined set. #[allow(clippy::too_many_arguments)] pub(crate) fn remove_combined( new_na: T, nb: T, new_sxa: T, sxb: T, new_sxxa: T, sxxb: T, new_sx3a: T, sx3b: T, old_sx4: T, sx4b: T, ) -> T { let nx = new_na + nb; let delta = sxb / nb - new_sxa / new_na; old_sx4 - (sx4b + delta.powi(4) * new_na * nb * (new_na.powi(2) - new_na * nb + nb.powi(2)) / nx.powi(3) + T::lit(6.) * (new_na.powi(2) * sxxb + nb.powi(2) * new_sxxa) * delta.powi(2) / nx.powi(2) + T::lit(4.) * (new_na * sx3b - nb * new_sx3a) * delta / nx) } } #[cfg(test)] mod tests { use super::*; use twofloat::TwoFloat; #[test] fn floatlike_lit() { assert_eq!(f64::lit(3.), 3.); assert_eq!(TwoFloat::lit(3.), TwoFloat::new_add(3., 0.)); } } ================================================ FILE: crates/stats-agg/src/stats1d.rs ================================================ use crate::{m3, m4, FloatLike, StatsError, TwoFloat, INV_FLOATING_ERROR_THRESHOLD}; use serde::{Deserialize, Serialize}; #[derive(Debug, PartialEq, Eq, Copy, Clone, Serialize, Deserialize)] #[repr(C)] pub struct StatsSummary1D { pub n: u64, pub sx: T, pub sx2: T, pub sx3: T, pub sx4: T, } impl Default for StatsSummary1D where T: FloatLike, { fn default() -> Self { Self::new() } } // can't make this impl generic without conflicting with the stdlib implementation of From for T impl From> for StatsSummary1D { fn from(input_summary: StatsSummary1D) -> Self { StatsSummary1D { n: input_summary.n, sx: input_summary.sx.into(), sx2: input_summary.sx2.into(), sx3: input_summary.sx3.into(), sx4: input_summary.sx4.into(), } } } pub fn convert_tf_to_f64(tf: TwoFloat) -> f64 { tf.hi() + tf.lo() } impl From> for StatsSummary1D { fn from(input_summary: StatsSummary1D) -> Self { StatsSummary1D { n: input_summary.n, sx: input_summary.sx.into(), sx2: input_summary.sx2.into(), sx3: input_summary.sx3.into(), sx4: input_summary.sx4.into(), } } } impl StatsSummary1D where T: FloatLike, { fn n64(&self) -> T { T::from_u64(self.n) } pub fn new() -> Self { StatsSummary1D { n: 0, sx: T::zero(), sx2: T::zero(), sx3: T::zero(), sx4: T::zero(), } } // we use the Youngs-Cramer method for accumulating the values here to allow for easy computation of variance etc in a numerically robust way. // for this part, we've essentially copied the Postgres implementation found: // https://github.com/postgres/postgres/blob/8bdd6f563aa2456de602e78991e6a9f61b8ec86d/src/backend/utils/adt/float.c#L2813 // Note that the Youngs-Cramer method relies on the sum((x - Sx/n)^2) for which they derive a recurrence relation which is reflected in the algorithm here: // the recurrence relation is: sum((x - Sx/n)^2) = Sxx = Sxx_n-1 + 1/(n(n-1)) * (nx - Sx)^2 pub fn accum(&mut self, p: T) -> Result<(), StatsError> { let old = *self; self.n += 1; self.sx += p; if old.n > 0 { let tmpx = p * self.n64() - self.sx; let scale = T::one() / (self.n64() * old.n64()); self.sx2 += tmpx * tmpx * scale; self.sx3 = m3::accum(old.n64(), old.sx, old.sx2, old.sx3, p); self.sx4 = m4::accum(old.n64(), old.sx, old.sx2, old.sx3, old.sx4, p); if self.has_infinite() { if self.check_overflow(&old, p) { return Err(StatsError::DoubleOverflow); } // sxx should be set to NaN if any of its inputs are // infinite, so if they ended up as infinite and there wasn't an overflow, // we need to set them to NaN instead as this implies that there was an // infinite input (because they necessarily involve multiplications of // infinites, which are NaNs) if self.sx2.is_infinite() { self.sx2 = T::nan(); } if self.sx3.is_infinite() { self.sx3 = T::nan(); } if self.sx4.is_infinite() { self.sx4 = T::nan(); } } } else { // first input, leave sxx alone unless we have infinite inputs if !p.is_finite() { self.sx2 = T::nan(); self.sx3 = T::nan(); self.sx4 = T::nan(); } } Result::Ok(()) } fn has_infinite(&self) -> bool { self.sx.is_infinite() || self.sx2.is_infinite() || self.sx3.is_infinite() || self.sx4.is_infinite() } fn check_overflow(&self, old: &Self, p: T) -> bool { //Only report overflow if we have finite inputs that lead to infinite results. self.has_infinite() && old.sx.is_finite() && p.is_finite() } // inverse transition function (inverse of accum) for windowed aggregates, return None if we want to re-calculate from scratch // we won't modify in place here because of that return bit, it might be that we want to modify accum to also // copy just for symmetry. // Assumption: no need for Result/error possibility because we can't overflow, as we are doing an inverse operation of something that already happened, so if it worked forward, it should work in reverse? // We're extending the Youngs Cramer algorithm here with the algebraic transformation to figure out the reverse calculations. // This goes beyond what the PG code does, and is our extension for performance in windowed calculations. // There is a case where the numerical error can get very large that we will try to avoid: if we have an outlier value that is much larger than the surrounding values // we can get something like: v1 + v2 + v3 + ... vn = outlier + v1 + v2 + v3 + ... + vn - outlier when the outlier is removed from the window. This will cause significant error in the // resulting calculation of v1 + ... + vn, more than we're comfortable with, so we'll return None in that case which will force recalculation from scratch of v1 + ... + vn. // Algebra for removal: // n = n_old + 1 -> n_old = n - 1 // Sx = Sx_old + x -> Sx_old = Sx - x // sum((x - Sx/n)^2) = Sxx = Sxx_old + 1/(n * n_old) * (nx - Sx)^2 -> Sxx_old = Sxx - 1/(n * n_old) * (nx - Sx)^2 pub fn remove(&self, p: T) -> Option { // if we are trying to remove a nan/infinite input, it's time to recalculate. if p.is_nan() || p.is_infinite() { return None; } // if we are removing a value that is very large compared to the sum of the values that we're removing it from, // we should probably recalculate to avoid accumulating error. We might want a different test for this, if there // is a way to calculate the error directly, that might be best... if p / self.sx > >::from(INV_FLOATING_ERROR_THRESHOLD) { return None; } // we can't have an initial value of n = 0 if we're removing something... if self.n == 0 { panic!(); //perhaps we should do error handling here, but I think this is reasonable as we are assuming that the removal is of an already-added item in the rest of this } if self.n == 1 { return Some(StatsSummary1D::new()); } let mut new = StatsSummary1D { n: self.n - 1, sx: self.sx - p, sx2: T::zero(), // initialize this for now. sx3: T::zero(), // initialize this for now. sx4: T::zero(), // initialize this for now. }; let tmpx = p * self.n64() - self.sx; let scale = (self.n64() * new.n64()).recip(); new.sx2 = self.sx2 - tmpx * tmpx * scale; new.sx3 = m3::remove(new.n64(), new.sx, new.sx2, self.sx3, p); new.sx4 = m4::remove(new.n64(), new.sx, new.sx2, new.sx3, self.sx4, p); Some(new) } // convenience function for creating an aggregate from a vector, currently used mostly for testing. pub fn new_from_vec(v: Vec) -> Result { let mut r = StatsSummary1D::new(); for p in v { r.accum(p)?; } Result::Ok(r) } pub fn combine(&self, other: Self) -> Result { // TODO: think about whether we want to just modify &self in place here for perf // reasons. This is also a set of weird questions around the Rust compiler, so // easier to just add the copy trait here, may need to adjust or may make things // harder if we do generics. if self.n == 0 && other.n == 0 { return Ok(StatsSummary1D::new()); } else if self.n == 0 { // handle the trivial n = 0 cases here, and don't worry about divide by zero errors later. return Ok(other); } else if other.n == 0 { return Ok(*self); } let tmp = self.sx / self.n64() - other.sx / other.n64(); let n = self.n + other.n; let r = StatsSummary1D { n, sx: self.sx + other.sx, sx2: self.sx2 + other.sx2 + self.n64() * other.n64() * tmp * tmp / ::from(n).unwrap(), sx3: m3::combine( self.n64(), other.n64(), self.sx, other.sx, self.sx2, other.sx2, self.sx3, other.sx3, ), sx4: m4::combine( self.n64(), other.n64(), self.sx, other.sx, self.sx2, other.sx2, self.sx3, other.sx3, self.sx4, other.sx4, ), }; if r.has_infinite() && !self.has_infinite() && !other.has_infinite() { return Err(StatsError::DoubleOverflow); } Ok(r) } // This is the inverse combine function for use in the window function context when we want to reverse the operation of the normal combine function // for re-aggregation over a window, this is what will get called in tumbling window averages for instance. // As with any window function, returning None will cause a re-calculation, so we do that in several cases where either we're dealing with infinites or we have some potential problems with outlying sums // so here, self is the previously combined StatsSummary, and we're removing the input and returning the part that would have been there before. pub fn remove_combined(&self, remove: Self) -> Option { let combined = &self; // just to lessen confusion with naming // handle the trivial n = 0 and equal n cases here, and don't worry about divide by zero errors later. if combined.n == remove.n { return Some(StatsSummary1D::new()); } else if remove.n == 0 { return Some(*self); } else if combined.n < remove.n { panic!(); // given that we're always removing things that we've previously added, we shouldn't be able to get a case where we're removing an n that's larger. } // if the sum we're removing is very large compared to the overall value we need to recalculate, see note on the remove function if remove.sx / combined.sx > >::from(INV_FLOATING_ERROR_THRESHOLD) { return None; } let mut part = StatsSummary1D { n: combined.n - remove.n, sx: combined.sx - remove.sx, sx2: T::zero(), //just initialize this, for now. sx3: T::zero(), //just initialize this, for now. sx4: T::zero(), //just initialize this, for now. }; let tmp = part.sx / part.n64() - remove.sx / remove.n64(); //gets squared so order doesn't matter part.sx2 = combined.sx2 - remove.sx2 - part.n64() * remove.n64() * tmp * tmp / combined.n64(); part.sx3 = m3::remove_combined( part.n64(), remove.n64(), part.sx, remove.sx, part.sx2, remove.sx2, self.sx3, remove.sx3, ); part.sx4 = m4::remove_combined( part.n64(), remove.n64(), part.sx, remove.sx, part.sx2, remove.sx2, part.sx3, remove.sx3, self.sx4, remove.sx4, ); Some(part) } pub fn avg(&self) -> Option { if self.n == 0 { return None; } Some(self.sx / self.n64()) } pub fn count(&self) -> i64 { self.n as i64 } pub fn sum(&self) -> Option { if self.n == 0 { return None; } Some(self.sx) } pub fn var_pop(&self) -> Option { if self.n == 0 { return None; } Some(self.sx2 / self.n64()) } pub fn var_samp(&self) -> Option { if self.n == 0 { return None; } Some(self.sx2 / (self.n64() - T::one())) } pub fn stddev_pop(&self) -> Option { Some(self.var_pop()?.sqrt()) } pub fn stddev_samp(&self) -> Option { Some(self.var_samp()?.sqrt()) } pub fn skewness_pop(&self) -> Option { Some(self.sx3 / self.n64() / self.stddev_pop()?.powi(3)) } pub fn skewness_samp(&self) -> Option { Some(self.sx3 / (self.n64() - T::one()) / self.stddev_samp()?.powi(3)) } pub fn kurtosis_pop(&self) -> Option { Some(self.sx4 / self.n64() / self.stddev_pop()?.powi(4)) } pub fn kurtosis_samp(&self) -> Option { Some(self.sx4 / (self.n64() - T::one()) / self.stddev_samp()?.powi(4)) } } #[cfg(test)] mod tests { use super::*; use approx::assert_relative_eq; fn tf(f: f64) -> TwoFloat { TwoFloat::new_add(f, 0.0) } #[track_caller] fn assert_close_enough(s1: &StatsSummary1D, s2: &StatsSummary1D) { assert_eq!(s1.n, s2.n); assert_relative_eq!(s1.sx, s2.sx); assert_relative_eq!(s1.sx2, s2.sx2); assert_relative_eq!(s1.sx3, s2.sx3); assert_relative_eq!(s1.sx4, s2.sx4); } #[track_caller] fn assert_close_enough_tf(s1: &StatsSummary1D, s2: &StatsSummary1D) { assert_eq!(s1.n, s2.n); assert!((s1.sx - s2.sx).abs() < 10.0 * f64::EPSILON); assert!((s1.sx2 - s2.sx2).abs() < 10.0 * f64::EPSILON); assert!((s1.sx3 - s2.sx3).abs() < 10.0 * f64::EPSILON); assert!((s1.sx4 - s2.sx4).abs() < 10.0 * f64::EPSILON); } #[test] fn test_against_known_vals() { let p = StatsSummary1D::new_from_vec(vec![7.0, 18.0, -2.0, 5.0, 3.0]).unwrap(); assert_eq!(p.n, 5); assert_relative_eq!(p.sx, 31.); assert_relative_eq!(p.sx2, 218.8); assert_relative_eq!(p.sx3, 1057.68); assert_relative_eq!(p.sx4, 24016.336); let p = p.remove(18.0).unwrap(); assert_eq!(p.n, 4); assert_relative_eq!(p.sx, 13.); assert_relative_eq!(p.sx2, 44.75); assert_relative_eq!(p.sx3, -86.625); assert_relative_eq!(p.sx4, 966.8281249999964); let p = p .combine(StatsSummary1D::new_from_vec(vec![0.5, 11.0, 6.123]).unwrap()) .unwrap(); assert_eq!(p.n, 7); assert_relative_eq!(p.sx, 30.623); assert_relative_eq!(p.sx2, 111.77425342857143); assert_relative_eq!(p.sx3, -5.324891254897949); assert_relative_eq!(p.sx4, 3864.054085451184); let p = p .remove_combined(StatsSummary1D::new_from_vec(vec![5.0, 11.0, 3.0]).unwrap()) .unwrap(); assert_eq!(p.n, 4); assert_relative_eq!(p.sx, 11.623); assert_relative_eq!(p.sx2, 56.96759675000001); assert_relative_eq!(p.sx3, -30.055041237374915); assert_relative_eq!(p.sx4, 1000.8186787745212); } #[test] fn test_against_known_vals_tf() { let p = StatsSummary1D::new_from_vec(vec![tf(7.0), tf(18.0), tf(-2.0), tf(5.0), tf(3.0)]) .unwrap(); assert_eq!(p.n, 5); assert_relative_eq!(Into::::into(p.sx), 31.); assert_relative_eq!(Into::::into(p.sx2), 218.8); assert_relative_eq!(Into::::into(p.sx3), 1057.68); assert_relative_eq!(Into::::into(p.sx4), 24016.336); let p = p.remove(tf(18.0)).unwrap(); assert_eq!(p.n, 4); assert_relative_eq!(Into::::into(p.sx), 13.); // value is slightly off assert_relative_eq!(Into::::into(p.sx2), 44.75, epsilon = 0.000000000001); assert_relative_eq!(Into::::into(p.sx3), -86.625, epsilon = 0.000000000001); assert_relative_eq!( Into::::into(p.sx4), 966.8281249999964, epsilon = 0.000000000001 ); let p = p .combine(StatsSummary1D::new_from_vec(vec![tf(0.5), tf(11.0), tf(6.123)]).unwrap()) .unwrap(); assert_eq!(p.n, 7); assert_relative_eq!(Into::::into(p.sx), 30.623); assert_relative_eq!(Into::::into(p.sx2), 111.77425342857143); // slight difference in values here – not sure if twofloat or f64 is more accurate assert_relative_eq!( Into::::into(p.sx3), -5.324891254897949, epsilon = 0.0000000001 ); assert_relative_eq!( Into::::into(p.sx4), 3864.054085451184, epsilon = 0.0000000001 ); let p = p .remove_combined( StatsSummary1D::new_from_vec(vec![tf(5.0), tf(11.0), tf(3.0)]).unwrap(), ) .unwrap(); assert_eq!(p.n, 4); assert_relative_eq!(Into::::into(p.sx), 11.623); // f64 gets this slightly over, TF gets this slightly under assert_relative_eq!( Into::::into(p.sx2), 56.96759675000001, epsilon = 0.000000000001 ); // slight difference in values here – not sure if twofloat or f64 is more accurate assert_relative_eq!( Into::::into(p.sx3), -30.055041237374915, epsilon = 0.0000000001 ); assert_relative_eq!( Into::::into(p.sx4), 1000.8186787745212, epsilon = 0.0000000001 ); } #[test] fn test_combine() { let p = StatsSummary1D::new_from_vec(vec![1.0, 2.0, 3.0, 4.0]).unwrap(); let q = StatsSummary1D::new_from_vec(vec![1.0, 2.0]).unwrap(); let r = StatsSummary1D::new_from_vec(vec![3.0, 4.0]).unwrap(); assert_close_enough(&q.combine(r).unwrap(), &p); let p = StatsSummary1D::new_from_vec(vec![tf(1.0), tf(2.0), tf(3.0), tf(4.0)]).unwrap(); let q = StatsSummary1D::new_from_vec(vec![tf(1.0), tf(2.0)]).unwrap(); let r = StatsSummary1D::new_from_vec(vec![tf(3.0), tf(4.0)]).unwrap(); assert_close_enough_tf(&q.combine(r).unwrap(), &p); } } ================================================ FILE: crates/stats-agg/src/stats2d/stats2d_flat_serialize.rs ================================================ use super::*; // expanded from FlatSerializable derive macro and made to work right with generic arg #[allow(warnings, clippy::all)] unsafe impl<'a> flat_serialize::FlatSerializable<'a> for StatsSummary2D { const REQUIRED_ALIGNMENT: usize = { use std::mem::align_of; let mut required_alignment = 1; let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } let alignment = ::REQUIRED_ALIGNMENT; if alignment > required_alignment { required_alignment = alignment; } required_alignment }; const MAX_PROVIDED_ALIGNMENT: Option = { use std::mem::align_of; let mut min_align: Option = None; let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } let ty_align = ::MAX_PROVIDED_ALIGNMENT; match (ty_align, min_align) { (None, _) => {} (Some(align), None) => min_align = Some(align), (Some(align), Some(min)) if align < min => min_align = Some(align), _ => {} } match min_align { None => None, Some(min_align) => { let min_size = Self::MIN_LEN; if min_size % 8 == 0 && min_align >= 8 { Some(8) } else if min_size % 4 == 0 && min_align >= 4 { Some(4) } else if min_size % 2 == 0 && min_align >= 2 { Some(2) } else { Some(1) } } } }; const MIN_LEN: usize = { use std::mem::size_of; let mut size = 0; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size += ::MIN_LEN; size }; const TRIVIAL_COPY: bool = true; type SLICE = flat_serialize::Slice<'a, StatsSummary2D>; type OWNED = Self; #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn try_ref(mut input: &[u8]) -> Result<(Self, &[u8]), flat_serialize::WrapErr> { if input.len() < Self::MIN_LEN { return Err(flat_serialize::WrapErr::NotEnoughBytes(Self::MIN_LEN)); } let __packet_macro_read_len = 0usize; let mut n: Option = None; let mut sx: Option = None; let mut sx2: Option = None; let mut sx3: Option = None; let mut sx4: Option = None; let mut sy: Option = None; let mut sy2: Option = None; let mut sy3: Option = None; let mut sy4: Option = None; let mut sxy: Option = None; 'tryref: loop { { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; n = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sx = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sx2 = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sx3 = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sx4 = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sy = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sy2 = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sy3 = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sy4 = Some(field); } { let (field, rem) = match ::try_ref(input) { Ok((f, b)) => (f, b), Err(flat_serialize::WrapErr::InvalidTag(offset)) => { return Err(flat_serialize::WrapErr::InvalidTag( __packet_macro_read_len + offset, )); } Err(..) => break 'tryref, }; input = rem; sxy = Some(field); } let _ref = StatsSummary2D { n: n.unwrap(), sx: sx.unwrap(), sx2: sx2.unwrap(), sx3: sx3.unwrap(), sx4: sx4.unwrap(), sy: sy.unwrap(), sy2: sy2.unwrap(), sy3: sy3.unwrap(), sy4: sy4.unwrap(), sxy: sxy.unwrap(), }; return Ok((_ref, input)); } Err(flat_serialize::WrapErr::NotEnoughBytes( 0 + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN + ::MIN_LEN, )) } #[allow(unused_assignments, unused_variables)] #[inline(always)] unsafe fn fill_slice<'out>( &self, input: &'out mut [std::mem::MaybeUninit], ) -> &'out mut [std::mem::MaybeUninit] { let total_len = self.num_bytes(); let (mut input, rem) = input.split_at_mut(total_len); let StatsSummary2D { n, sx, sx2, sx3, sx4, sy, sy2, sy3, sy4, sxy, } = self; unsafe { input = n.fill_slice(input); }; unsafe { input = sx.fill_slice(input); }; unsafe { input = sx2.fill_slice(input); }; unsafe { input = sx3.fill_slice(input); }; unsafe { input = sx4.fill_slice(input); }; unsafe { input = sy.fill_slice(input); }; unsafe { input = sy2.fill_slice(input); }; unsafe { input = sy3.fill_slice(input); }; unsafe { input = sy4.fill_slice(input); }; unsafe { input = sxy.fill_slice(input); } if true { match (&input.len(), &0) { (left_val, right_val) => { debug_assert_eq!(input.len(), 0); } }; } rem } #[allow(unused_assignments, unused_variables)] #[inline(always)] fn num_bytes(&self) -> usize { let StatsSummary2D { n, sx, sx2, sx3, sx4, sy, sy2, sy3, sy4, sxy, } = self; 0usize + ::num_bytes(n) + ::num_bytes(sx) + ::num_bytes(sx2) + ::num_bytes(sx3) + ::num_bytes(sx4) + ::num_bytes(sy) + ::num_bytes(sy2) + ::num_bytes(sy3) + ::num_bytes(sy4) + ::num_bytes(sxy) } #[inline(always)] fn make_owned(&mut self) {} #[inline(always)] fn into_owned(self) -> Self::OWNED { self } } ================================================ FILE: crates/stats-agg/src/stats2d.rs ================================================ // 2D stats are based on the Youngs-Cramer implementation in PG here: // https://github.com/postgres/postgres/blob/472e518a44eacd9caac7d618f1b6451672ca4481/src/backend/utils/adt/float.c#L3260 use crate::{m3, m4, FloatLike, StatsError, XYPair, INV_FLOATING_ERROR_THRESHOLD}; use serde::{Deserialize, Serialize}; use twofloat::TwoFloat; mod stats2d_flat_serialize; #[derive(Debug, PartialEq, Eq, Copy, Clone, Serialize, Deserialize)] #[repr(C)] pub struct StatsSummary2D { pub n: u64, // count pub sx: T, // sum(x) pub sx2: T, // sum((x-sx/n)^2) (sum of squares) pub sx3: T, // sum((x-sx/n)^3) pub sx4: T, // sum((x-sx/n)^4) pub sy: T, // sum(y) pub sy2: T, // sum((y-sy/n)^2) (sum of squares) pub sy3: T, // sum((y-sy/n)^3) pub sy4: T, // sum((y-sy/n)^4) pub sxy: T, // sum((x-sx/n)*(y-sy/n)) (sum of products) } impl From> for StatsSummary2D { fn from(input_summary: StatsSummary2D) -> Self { StatsSummary2D { n: input_summary.n, sx: input_summary.sx.into(), sx2: input_summary.sx2.into(), sx3: input_summary.sx3.into(), sx4: input_summary.sx4.into(), sy: input_summary.sy.into(), sy2: input_summary.sy2.into(), sy3: input_summary.sy3.into(), sy4: input_summary.sy4.into(), sxy: input_summary.sxy.into(), } } } impl Default for StatsSummary2D { fn default() -> Self { Self::new() } } impl StatsSummary2D { pub fn new() -> Self { StatsSummary2D { n: 0, sx: T::zero(), sx2: T::zero(), sx3: T::zero(), sx4: T::zero(), sy: T::zero(), sy2: T::zero(), sy3: T::zero(), sy4: T::zero(), sxy: T::zero(), } } fn n64(&self) -> T { T::from_u64(self.n) } /// accumulate an XYPair into a StatsSummary2D /// ``` /// use stats_agg::*; /// use stats_agg::stats2d::*; /// let mut p = StatsSummary2D::new(); /// p.accum(XYPair{x:1.0, y:1.0,}).unwrap(); /// p.accum(XYPair{x:2.0, y:2.0,}).unwrap(); /// //we can add in infinite values and it will handle it properly. /// p.accum(XYPair{x:f64::INFINITY, y:1.0}).unwrap(); /// assert_eq!(p.sum().unwrap().x, f64::INFINITY); /// assert!(p.sum_squares().unwrap().x.is_nan()); // this is NaN because it involves multiplication of two infinite values /// /// assert_eq!(p.accum(XYPair{y:f64::MAX, x:1.0,}), Err(StatsError::DoubleOverflow)); // we do error if we actually overflow however /// ///``` pub fn accum(&mut self, p: XYPair) -> Result<(), StatsError> { let old = *self; self.n += 1; self.sx += p.x; self.sy += p.y; if old.n > 0 { let tmpx = p.x * self.n64() - self.sx; let tmpy = p.y * self.n64() - self.sy; let scale = (self.n64() * old.n64()).recip(); self.sx2 += tmpx * tmpx * scale; self.sx3 = m3::accum(old.n64(), old.sx, old.sx2, old.sx3, p.x); self.sx4 = m4::accum(old.n64(), old.sx, old.sx2, old.sx3, old.sx4, p.x); self.sy2 += tmpy * tmpy * scale; self.sy3 = m3::accum(old.n64(), old.sy, old.sy2, old.sy3, p.y); self.sy4 = m4::accum(old.n64(), old.sy, old.sy2, old.sy3, old.sy4, p.y); self.sxy += tmpx * tmpy * scale; if self.has_infinite() { if self.check_overflow(&old, p) { return Err(StatsError::DoubleOverflow); } // sxx, syy, and sxy should be set to NaN if any of their inputs are // infinite, so if they ended up as infinite and there wasn't an overflow, // we need to set them to NaN instead as this implies that there was an // infinite input (because they necessarily involve multiplications of // infinites, which are NaNs) if self.sx2.is_infinite() { self.sx2 = T::nan(); } if self.sx3.is_infinite() { self.sx3 = T::nan(); } if self.sx4.is_infinite() { self.sx4 = T::nan(); } if self.sy2.is_infinite() { self.sy2 = T::nan(); } if self.sy3.is_infinite() { self.sy3 = T::nan(); } if self.sy4.is_infinite() { self.sy4 = T::nan(); } if self.sxy.is_infinite() { self.sxy = T::nan(); } } } else { // first input, leave sxx/syy/sxy alone unless we have infinite inputs if !p.x.is_finite() { self.sx2 = T::nan(); self.sx3 = T::nan(); self.sx4 = T::nan(); self.sxy = T::nan(); } if !p.y.is_finite() { self.sy2 = T::nan(); self.sy3 = T::nan(); self.sy4 = T::nan(); self.sxy = T::nan(); } } Result::Ok(()) } fn has_infinite(&self) -> bool { self.sx.is_infinite() || self.sx2.is_infinite() || self.sx3.is_infinite() || self.sx4.is_infinite() || self.sy.is_infinite() || self.sy2.is_infinite() || self.sy3.is_infinite() || self.sy4.is_infinite() || self.sxy.is_infinite() } fn check_overflow(&self, old: &StatsSummary2D, p: XYPair) -> bool { //Only report overflow if we have finite inputs that lead to infinite results. ((self.sx.is_infinite() || self.sx2.is_infinite() || self.sx3.is_infinite() || self.sx4.is_infinite()) && old.sx.is_finite() && p.x.is_finite()) || ((self.sy.is_infinite() || self.sy2.is_infinite() || self.sy3.is_infinite() || self.sy4.is_infinite()) && old.sy.is_finite() && p.y.is_finite()) || (self.sxy.is_infinite() && old.sx.is_finite() && p.x.is_finite() && old.sy.is_finite() && p.y.is_finite()) } // inverse transition function (inverse of accum) for windowed aggregates, return None if we want to re-calculate from scratch // we won't modify in place here because of that return bit, it might be that we want to modify accum to also // copy just for symmetry. // Assumption: no need for Result/error possibility because we can't overflow, as we are doing an inverse operation of something that already happened, so if it worked forward, it should work in reverse? // We're extending the Youngs Cramer algorithm here with the algebraic transformation to figure out the reverse calculations. // This goes beyond what the PG code does, and is our extension for performance in windowed calculations. // There is a case where the numerical error can get very large that we will try to avoid: if we have an outlier value that is much larger than the surrounding values // we can get something like: v1 + v2 + v3 + ... vn = outlier + v1 + v2 + v3 + ... + vn - outlier when the outlier is removed from the window. This will cause significant error in the // resulting calculation of v1 + ... + vn, more than we're comfortable with, so we'll return None in that case which will force recalculation from scratch of v1 + ... + vn. // Algebra for removal: // n = n_old + 1 -> n_old = n - 1 // Sx = Sx_old + x -> Sx_old = Sx - x // sum((x - Sx/n)^2) = Sxx = Sxx_old + 1/(n * n_old) * (nx - Sx)^2 -> Sxx_old = Sxx - 1/(n * n_old) * (nx - Sx)^2 // Sy / Syy analogous // sum((x - Sx/n)(y - Sy/n)) = Sxy = Sxy_old + 1/(n * n_old) * (nx - Sx) * (ny - Sy) -> Sxy_old = Sxy - 1/(n * n_old) * (nx - Sx) * (ny - Sy) pub fn remove(&self, p: XYPair) -> Option { // if we are trying to remove a nan/infinite input, it's time to recalculate. if !p.x.is_finite() || !p.y.is_finite() { return None; } // if we are removing a value that is very large compared to the sum of the values that we're removing it from, // we should probably recalculate to avoid accumulating error. We might want a different test for this, if there // is a way to calculate the error directly, that might be best... let thresh = >::from(INV_FLOATING_ERROR_THRESHOLD); if p.x / self.sx > thresh || p.y / self.sy > thresh { return None; } // we can't have an initial value of n = 0 if we're removing something... if self.n == 0 { panic!(); //perhaps we should do error handling here, but I think this is reasonable as we are assuming that the removal is of an already-added item in the rest of this } // if we're removing the last point we should just return a completely empty value to eliminate any errors, it can only be completely empty at that point. if self.n == 1 { return Some(StatsSummary2D::new()); } let mut new = StatsSummary2D { n: self.n - 1, sx: self.sx - p.x, sy: self.sy - p.y, sx2: T::zero(), // initialize these for now. sx3: T::zero(), sx4: T::zero(), sy2: T::zero(), sy3: T::zero(), sy4: T::zero(), sxy: T::zero(), }; let tmpx = p.x * self.n64() - self.sx; let tmpy = p.y * self.n64() - self.sy; let scale = (self.n64() * new.n64()).recip(); new.sx2 = self.sx2 - tmpx * tmpx * scale; new.sx3 = m3::remove(new.n64(), new.sx, new.sx2, self.sx3, p.x); new.sx4 = m4::remove(new.n64(), new.sx, new.sx2, new.sx3, self.sx4, p.x); new.sy2 = self.sy2 - tmpy * tmpy * scale; new.sy3 = m3::remove(new.n64(), new.sy, new.sy2, self.sy3, p.y); new.sy4 = m4::remove(new.n64(), new.sy, new.sy2, new.sy3, self.sy4, p.y); new.sxy = self.sxy - tmpx * tmpy * scale; Some(new) } ///create a StatsSummary2D from a vector of XYPairs /// ``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let mut p = StatsSummary2D::new(); /// p.accum(XYPair{x:1.0, y:1.0,}).unwrap(); /// p.accum(XYPair{x:2.0, y:2.0,}).unwrap(); /// p.accum(XYPair{x:3.0, y:3.0,}).unwrap(); /// let q = StatsSummary2D::new_from_vec(vec![XYPair{x:1.0, y:1.0,}, XYPair{x:2.0, y:2.0,}, XYPair{x:3.0, y:3.0,}]).unwrap(); /// assert_eq!(p, q); ///``` pub fn new_from_vec(v: Vec>) -> Result { let mut r = StatsSummary2D::new(); for p in v { r.accum(p)?; } Result::Ok(r) } /// combine two StatsSummary2Ds /// ``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let p = StatsSummary2D::new_from_vec(vec![XYPair{x:1.0, y:1.0,}, XYPair{x:2.0, y:2.0,}, XYPair{x:3.0, y:3.0,}, XYPair{x:4.0, y:4.0,}]).unwrap(); /// let q = StatsSummary2D::new_from_vec(vec![XYPair{x:1.0, y:1.0,}, XYPair{x:2.0, y:2.0,},]).unwrap(); /// let r = StatsSummary2D::new_from_vec(vec![XYPair{x:3.0, y:3.0,}, XYPair{x:4.0, y:4.0,},]).unwrap(); /// let r = r.combine(q).unwrap(); /// assert_eq!(r, p); /// ``` // we combine two StatsSummary2Ds via a generalization of the Youngs-Cramer algorithm, we follow what Postgres does here // n = n1 + n2 // sx = sx1 + sx2 // sxx = sxx1 + sxx2 + n1 * n2 * (sx1/n1 - sx2/n2)^2 / n // sy / syy analogous // sxy = sxy1 + sxy2 + n1 * n2 * (sx1/n1 - sx2/n2) * (sy1/n1 - sy2/n2) / n pub fn combine(&self, other: StatsSummary2D) -> Result { // TODO: think about whether we want to just modify &self in place here for perf // reasons. This is also a set of weird questions around the Rust compiler, so // easier to just add the copy trait here, may need to adjust or may make things // harder if we do generics. if self.n == 0 && other.n == 0 { return Ok(StatsSummary2D::new()); } else if self.n == 0 { // handle the trivial n = 0 cases here, and don't worry about divide by zero errors later. return Ok(other); } else if other.n == 0 { return Ok(*self); } let tmpx = self.sx / self.n64() - other.sx / other.n64(); let tmpy = self.sy / self.n64() - other.sy / other.n64(); let n = self.n + other.n; let r = StatsSummary2D { n, sx: self.sx + other.sx, sx2: self.sx2 + other.sx2 + self.n64() * other.n64() * tmpx * tmpx / T::from_u64(n), sx3: m3::combine( self.n64(), other.n64(), self.sx, other.sx, self.sx2, other.sx2, self.sx3, other.sx3, ), sx4: m4::combine( self.n64(), other.n64(), self.sx, other.sx, self.sx2, other.sx2, self.sx3, other.sx3, self.sx4, other.sx4, ), sy: self.sy + other.sy, sy2: self.sy2 + other.sy2 + self.n64() * other.n64() * tmpy * tmpy / T::from_u64(n), sy3: m3::combine( self.n64(), other.n64(), self.sy, other.sy, self.sy2, other.sy2, self.sy3, other.sy3, ), sy4: m4::combine( self.n64(), other.n64(), self.sy, other.sy, self.sy2, other.sy2, self.sy3, other.sy3, self.sy4, other.sy4, ), sxy: self.sxy + other.sxy + self.n64() * other.n64() * tmpx * tmpy / T::from_u64(n), }; if r.has_infinite() && !self.has_infinite() && !other.has_infinite() { return Err(StatsError::DoubleOverflow); } Ok(r) } // This is the inverse combine function for use in the window function context when we want to reverse the operation of the normal combine function // for re-aggregation over a window, this is what will get called in tumbling window averages for instance. // As with any window function, returning None will cause a re-calculation, so we do that in several cases where either we're dealing with infinites or we have some potential problems with outlying sums // so here, self is the previously combined StatsSummary, and we're removing the input and returning the part that would have been there before. pub fn remove_combined(&self, remove: StatsSummary2D) -> Option { let combined = &self; // just to lessen confusion with naming // handle the trivial n = 0 and equal n cases here, and don't worry about divide by zero errors later. if combined.n == remove.n { return Some(StatsSummary2D::new()); } else if remove.n == 0 { return Some(*self); } else if combined.n < remove.n { panic!(); // given that we're always removing things that we've previously added, we shouldn't be able to get a case where we're removing an n that's larger. } // if the sum we're removing is very large compared to the overall value we need to recalculate, see note on the remove function let thresh = >::from(INV_FLOATING_ERROR_THRESHOLD); if remove.sx / combined.sx > thresh || remove.sy / combined.sy > thresh { return None; } let mut part = StatsSummary2D { n: combined.n - remove.n, sx: combined.sx - remove.sx, sy: combined.sy - remove.sy, sx2: T::zero(), //just initialize these, for now. sx3: T::zero(), sx4: T::zero(), sy2: T::zero(), sy3: T::zero(), sy4: T::zero(), sxy: T::zero(), }; let tmpx = part.sx / part.n64() - remove.sx / remove.n64(); //gets squared so order doesn't matter let tmpy = part.sy / part.n64() - remove.sy / remove.n64(); part.sx2 = combined.sx2 - remove.sx2 - part.n64() * remove.n64() * tmpx * tmpx / combined.n64(); part.sx3 = m3::remove_combined( part.n64(), remove.n64(), part.sx, remove.sx, part.sx2, remove.sx2, self.sx3, remove.sx3, ); part.sx4 = m4::remove_combined( part.n64(), remove.n64(), part.sx, remove.sx, part.sx2, remove.sx2, part.sx3, remove.sx3, self.sx4, remove.sx4, ); part.sy2 = combined.sy2 - remove.sy2 - part.n64() * remove.n64() * tmpy * tmpy / combined.n64(); part.sy3 = m3::remove_combined( part.n64(), remove.n64(), part.sy, remove.sy, part.sy2, remove.sy2, self.sy3, remove.sy3, ); part.sy4 = m4::remove_combined( part.n64(), remove.n64(), part.sy, remove.sy, part.sy2, remove.sy2, part.sy3, remove.sy3, self.sy4, remove.sy4, ); part.sxy = combined.sxy - remove.sxy - part.n64() * remove.n64() * tmpx * tmpy / combined.n64(); Some(part) } /// offsets all values accumulated in a StatsSummary2D by a given amount in X & Y. This /// only works if all values are offset by that amount. This is used for allowing /// relative calculations in a local region and then allowing them to be combined with /// other regions where all points are offset by the same amount. The main use case /// for now is in the counter case where, when partials are combined you can get a new /// offset for all points in the counter. // Note that when offsetting, the offset of the previous partial be multiplied by N and added to the Sy value. All the other values are // unaffected because they rely on the expression (Y-Sy/N), (and analogous for the X values) which is basically each value subtracted from the // average of all values and if all values are shifted by a constant, then the average shifts by the same constant so it cancels out: // i.e. If a constant C is added to each Y, then (Y-Sy/N) reduces back to itself as follows: //(Y + C) - (Sy + NC)/N // Y + C - Sy/N - NC/N // Y + C - Sy/N - C // Y - Sy/N pub fn offset(&mut self, offset: XYPair) -> Result<(), StatsError> { self.sx += self.n64() * offset.x; self.sy += self.n64() * offset.y; if self.has_infinite() && offset.x.is_finite() && offset.y.is_finite() { return Err(StatsError::DoubleOverflow); } Ok(()) } //TODO: Add tests for offsets ///returns the sum of squares of both the independent (x) and dependent (y) variables ///as an XYPair, where the sum of squares is defined as: sum(x^2) - sum(x)^2 / n) ///``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let p = StatsSummary2D::new_from_vec(vec![XYPair{y:2.0, x:1.0,}, XYPair{y:4.0, x:2.0,}, XYPair{y:6.0, x:3.0,}]).unwrap(); /// let ssx = (1.0_f64.powi(2) + 2.0_f64.powi(2) + 3.0_f64.powi(2)) - (1.0+2.0+3.0_f64).powi(2)/3.0; /// let ssy = (2.0_f64.powi(2) + 4.0_f64.powi(2) + 6.0_f64.powi(2)) - (2.0+4.0+6.0_f64).powi(2)/3.0; /// let ssp = p.sum_squares().unwrap(); /// assert_eq!(ssp.x, ssx); /// assert_eq!(ssp.y, ssy); /// //empty StatsSummary2Ds return None /// assert!(StatsSummary2D::::new().sum_squares().is_none()); /// ``` pub fn sum_squares(&self) -> Option> { if self.n == 0 { return None; } Some(XYPair { x: self.sx2, y: self.sy2, }) } ///returns the "sum of products" of the dependent * independent variables sum(x * y) - sum(x) * sum(y) / n ///``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let p = StatsSummary2D::new_from_vec(vec![XYPair{y:2.0, x:1.0,}, XYPair{y:4.0, x:2.0,}, XYPair{y:6.0, x:3.0,}]).unwrap(); /// let s = (2.0 * 1.0 + 4.0 * 2.0 + 6.0 * 3.0) - (2.0 + 4.0 + 6.0)*(1.0 + 2.0 + 3.0)/3.0; /// assert_eq!(p.sumxy().unwrap(), s); /// //empty StatsSummary2Ds return None /// assert!(StatsSummary2D::::new().sumxy().is_none()); /// ``` pub fn sumxy(&self) -> Option { if self.n == 0 { return None; } Some(self.sxy) } ///returns the averages of the x and y variables ///``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let p = StatsSummary2D::new_from_vec(vec![XYPair{y:2.0, x:1.0,}, XYPair{y:4.0, x:2.0,}, XYPair{y:6.0, x:3.0,}]).unwrap(); /// let avgx = (1.0 + 2.0 + 3.0)/3.0; /// let avgy = (2.0 + 4.0 + 6.0)/3.0; /// let avgp = p.avg().unwrap(); /// assert_eq!(avgp.x, avgx); /// assert_eq!(avgp.y, avgy); /// //empty StatsSummary2Ds return None /// assert!(StatsSummary2D::::new().avg().is_none()); /// ``` pub fn avg(&self) -> Option> { if self.n == 0 { return None; } Some(XYPair { x: self.sx / self.n64(), y: self.sy / self.n64(), }) } ///returns the count of inputs as an i64 ///``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// /// let p = StatsSummary2D::new_from_vec(vec![XYPair{y:2.0, x:1.0,}, XYPair{y:4.0, x:2.0,}, XYPair{y:6.0, x:3.0,}]).unwrap(); /// let s = 3; /// assert_eq!(p.count(), s); /// //empty StatsSummary2Ds return 0 count /// assert_eq!(StatsSummary2D::::new().count(), 0); /// ``` pub fn count(&self) -> i64 { self.n as i64 } ///returns the sums of x and y as an XYPair ///``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let p = StatsSummary2D::new_from_vec(vec![XYPair{y:2.0, x:1.0,}, XYPair{y:4.0, x:2.0,}, XYPair{y:6.0, x:3.0,}]).unwrap(); /// let sumx = (1.0 + 2.0 + 3.0); /// let sumy = (2.0 + 4.0 + 6.0); /// let sump = p.sum().unwrap(); /// assert_eq!(sump.x, sumx); /// assert_eq!(sump.y, sumy); /// //empty StatsSummary2Ds return None /// assert!(StatsSummary2D::::new().sum().is_none()); /// ``` pub fn sum(&self) -> Option> { if self.n == 0 { return None; } Some(XYPair { x: self.sx, y: self.sy, }) } pub fn var_pop(&self) -> Option> { if self.n == 0 { return None; } Some(XYPair { x: self.sx2 / self.n64(), y: self.sy2 / self.n64(), }) } pub fn var_samp(&self) -> Option> { if self.n <= 1 { return None; } Some(XYPair { x: self.sx2 / (self.n64() - T::one()), y: self.sy2 / (self.n64() - T::one()), }) } ///returns the population standard deviation of both the independent and dependent variables as an XYPair pub fn stddev_pop(&self) -> Option> { let var = self.var_pop()?; Some(XYPair { x: var.x.sqrt(), y: var.y.sqrt(), }) } ///returns the sample standard deviation of both the independent and dependent variables as an XYPair pub fn stddev_samp(&self) -> Option> { let var = self.var_samp()?; Some(XYPair { x: var.x.sqrt(), y: var.y.sqrt(), }) } pub fn skewness_pop(&self) -> Option> { let stddev = self.stddev_pop()?; Some(XYPair { x: self.sx3 / self.n64() / stddev.x.powi(3), y: self.sy3 / self.n64() / stddev.y.powi(3), }) } pub fn skewness_samp(&self) -> Option> { let stddev = self.stddev_samp()?; Some(XYPair { x: self.sx3 / (self.n64() - T::one()) / stddev.x.powi(3), y: self.sy3 / (self.n64() - T::one()) / stddev.y.powi(3), }) } pub fn kurtosis_pop(&self) -> Option> { let stddev = self.stddev_pop()?; Some(XYPair { x: self.sx4 / self.n64() / stddev.x.powi(4), y: self.sy4 / self.n64() / stddev.y.powi(4), }) } pub fn kurtosis_samp(&self) -> Option> { let stddev = self.stddev_samp()?; Some(XYPair { x: self.sx4 / (self.n64() - T::one()) / stddev.x.powi(4), y: self.sy4 / (self.n64() - T::one()) / stddev.y.powi(4), }) } /// returns the correlation coefficient, which is the covariance / (stddev(x) * stddev(y)) /// Note that it makes no difference whether we choose the sample or /// population covariance and stddev, because we end up with a canceling n or n-1 term. This /// also allows us to reduce our calculation to the sumxy / sqrt(sum_squares(x)*sum_squares(y)) pub fn corr(&self) -> Option { // empty StatsSummary2Ds, horizontal or vertical lines should return None if self.n == 0 || self.sx2 == T::zero() || self.sy2 == T::zero() { return None; } Some(self.sxy / (self.sx2 * self.sy2).sqrt()) } /// returns the slope of the least squares fit line pub fn slope(&self) -> Option { // the case of a single point will usually be triggered by the the second branch of this (which is also a test for a vertical line) //however, in cases where we had an infinite input, we will end up with NaN which is the expected behavior. if self.n == 0 || self.sx2 == T::zero() { return None; } Some(self.sxy / self.sx2) } /// returns the intercept of the least squares fit line pub fn intercept(&self) -> Option { if self.n == 0 || self.sx2 == T::zero() { return None; } Some((self.sy - self.sx * self.sxy / self.sx2) / self.n64()) } /// returns the x intercept of the least squares fit line // y = mx + b (y = 0) // -b = mx // x = -b / m pub fn x_intercept(&self) -> Option { // vertical line does have an x intercept if self.n > 1 && self.sx2 == T::zero() { return Some(self.sx / self.n64()); } // horizontal lines have no x intercepts if self.sy2 == T::zero() { return None; } Some(-self.intercept()? / self.slope()?) } /// returns the square of the correlation coefficient (aka the coefficient of determination) pub fn determination_coeff(&self) -> Option { if self.n == 0 || self.sx2 == T::zero() { return None; } //horizontal lines return 1.0 error if self.sy2 == T::zero() { return Some(T::one()); } Some(self.sxy * self.sxy / (self.sx2 * self.sy2)) } ///returns the sample covariance: (sumxy()/n-1) ///``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let p = StatsSummary2D::new_from_vec(vec![XYPair{y:2.0, x:1.0,}, XYPair{y:4.0, x:2.0,}, XYPair{y:6.0, x:3.0,}]).unwrap(); /// let s = (2.0 * 1.0 + 4.0 * 2.0 + 6.0 * 3.0) - (2.0 + 4.0 + 6.0)*(1.0 + 2.0 + 3.0)/3.0; /// let s = s/2.0; /// assert_eq!(p.covar_samp().unwrap(), s); /// //empty StatsSummary2Ds return None /// assert!(StatsSummary2D::::new().covar_samp().is_none()); /// ``` pub fn covar_samp(&self) -> Option { if self.n <= 1 { return None; } Some(self.sxy / (self.n64() - T::one())) } ///returns the population covariance: (sumxy()/n) ///``` /// use stats_agg::stats2d::StatsSummary2D; /// use stats_agg::XYPair; /// let p = StatsSummary2D::new_from_vec(vec![XYPair{y:2.0, x:1.0,}, XYPair{y:4.0, x:2.0,}, XYPair{y:6.0, x:3.0,}]).unwrap(); /// let s = (2.0 * 1.0 + 4.0 * 2.0 + 6.0 * 3.0) - (2.0 + 4.0 + 6.0)*(1.0 + 2.0 + 3.0)/3.0; /// let s = s/3.0; /// assert_eq!(p.covar_pop().unwrap(), s); /// //empty StatsSummary2Ds return None /// assert!(StatsSummary2D::::new().covar_pop().is_none()); /// ``` pub fn covar_pop(&self) -> Option { if self.n == 0 { return None; } Some(self.sxy / self.n64()) } } #[cfg(test)] mod tests { use super::*; fn tf(f: f64) -> TwoFloat { TwoFloat::new_add(f, 0.0) } #[test] fn test_linear() { let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: 2.0, x: 1.0 }, XYPair { y: 4.0, x: 2.0 }, XYPair { y: 6.0, x: 3.0 }, ]) .unwrap(); assert_eq!(p.slope().unwrap(), 2.0); assert_eq!(p.intercept().unwrap(), 0.0); assert_eq!(p.x_intercept().unwrap(), 0.0); let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: 2.0, x: 2.0 }, XYPair { y: 4.0, x: 3.0 }, XYPair { y: 6.0, x: 4.0 }, ]) .unwrap(); assert_eq!(p.slope().unwrap(), 2.0); assert_eq!(p.intercept().unwrap(), -2.0); assert_eq!(p.x_intercept().unwrap(), 1.0); // empty let p: StatsSummary2D = StatsSummary2D::new(); assert_eq!(p.slope(), None); assert_eq!(p.intercept(), None); assert_eq!(p.x_intercept(), None); // singleton let p = StatsSummary2D::new_from_vec(vec![XYPair { y: 2.0, x: 2.0 }]).unwrap(); assert_eq!(p.slope(), None); assert_eq!(p.intercept(), None); assert_eq!(p.x_intercept(), None); //vertical let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: 2.0, x: 2.0 }, XYPair { y: 4.0, x: 2.0 }, ]) .unwrap(); assert_eq!(p.slope(), None); assert_eq!(p.intercept(), None); assert_eq!(p.x_intercept().unwrap(), 2.0); //horizontal let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: 2.0, x: 2.0 }, XYPair { y: 2.0, x: 4.0 }, ]) .unwrap(); assert_eq!(p.slope().unwrap(), 0.0); assert_eq!(p.intercept().unwrap(), 2.0); assert_eq!(p.x_intercept(), None); } #[test] fn test_linear_tf() { let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: tf(2.0), x: tf(1.0), }, XYPair { y: tf(4.0), x: tf(2.0), }, XYPair { y: tf(6.0), x: tf(3.0), }, ]) .unwrap(); assert_eq!(p.slope().unwrap(), tf(2.0)); assert_eq!(p.intercept().unwrap(), tf(0.0)); assert_eq!(p.x_intercept().unwrap(), tf(0.0)); let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: tf(2.0), x: tf(2.0), }, XYPair { y: tf(4.0), x: tf(3.0), }, XYPair { y: tf(6.0), x: tf(4.0), }, ]) .unwrap(); assert_eq!(p.slope().unwrap(), tf(2.0)); assert_eq!(p.intercept().unwrap().hi(), -2.0); assert!(p.intercept().unwrap().lo().abs() < f64::EPSILON); assert_eq!(p.x_intercept().unwrap().hi(), 1.0); assert!(p.x_intercept().unwrap().lo().abs() < f64::EPSILON); // empty let p: StatsSummary2D = StatsSummary2D::new(); assert_eq!(p.slope(), None); assert_eq!(p.intercept(), None); assert_eq!(p.x_intercept(), None); // singleton let p = StatsSummary2D::new_from_vec(vec![XYPair { y: tf(2.0), x: tf(2.0), }]) .unwrap(); assert_eq!(p.slope(), None); assert_eq!(p.intercept(), None); assert_eq!(p.x_intercept(), None); //vertical let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: tf(2.0), x: tf(2.0), }, XYPair { y: tf(4.0), x: tf(2.0), }, ]) .unwrap(); assert_eq!(p.slope(), None); assert_eq!(p.intercept(), None); assert_eq!(p.x_intercept().unwrap(), tf(2.0)); //horizontal let p = StatsSummary2D::new_from_vec(vec![ XYPair { y: tf(2.0), x: tf(2.0), }, XYPair { y: tf(2.0), x: tf(4.0), }, ]) .unwrap(); assert_eq!(p.slope().unwrap(), tf(0.0)); assert_eq!(p.intercept().unwrap(), tf(2.0)); assert_eq!(p.x_intercept(), None); } } ================================================ FILE: crates/t-digest/Cargo.toml ================================================ [package] name = "tdigest" version = "0.2.2" edition = "2021" # based on: https://github.com/MnO2/t-digest" [dependencies] flat_serialize = {path="../flat_serialize/flat_serialize"} flat_serialize_macro = {path="../flat_serialize/flat_serialize_macro"} ordered-float = {version = "1.0", features = ["serde"] } ron = "0.6.0" serde = { version = "1.0", features = ["derive"] } [dev-dependencies] quickcheck = "1" quickcheck_macros = "1" ================================================ FILE: crates/t-digest/src/lib.rs ================================================ // Based on https://github.com/MnO2/t-digest/blob/master/src/lib.rs // as of commit 66d7c19d32c1547daa628f1d9f12178a686ba022 //! T-Digest algorithm in rust //! //! ## Installation //! //! Add this to your `Cargo.toml`: //! //! ```toml //! [dependencies] //! tdigest = "0.2" //! ``` //! //! then you are good to go. If you are using Rust 2015 you have to ``extern crate tdigest`` to your crate root as well. //! //! ## Example //! //! ```rust //! use tdigest::TDigest; //! //! let t = TDigest::new_with_size(100); //! let values: Vec = (1..=1_000_000).map(f64::from).collect(); //! //! let t = t.merge_sorted(values); //! //! let ans = t.estimate_quantile(0.99); //! let expected: f64 = 990_000.0; //! //! let percentage: f64 = (expected - ans).abs() / expected; //! assert!(percentage < 0.01); //! ``` use ordered_float::OrderedFloat; use std::cmp::Ordering; #[cfg(test)] use std::collections::HashSet; use flat_serialize_macro::FlatSerializable; use serde::{Deserialize, Serialize}; #[cfg(test)] extern crate quickcheck; #[cfg(test)] #[macro_use(quickcheck)] extern crate quickcheck_macros; /// Centroid implementation to the cluster mentioned in the paper. #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, FlatSerializable)] #[repr(C)] pub struct Centroid { mean: OrderedFloat, weight: u64, } impl PartialOrd for Centroid { fn partial_cmp(&self, other: &Centroid) -> Option { Some(self.cmp(other)) } } impl Ord for Centroid { fn cmp(&self, other: &Centroid) -> Ordering { self.mean.cmp(&other.mean) } } impl Centroid { pub fn new(mean: f64, weight: u64) -> Self { Centroid { mean: OrderedFloat::from(mean), weight, } } #[inline] pub fn mean(&self) -> f64 { self.mean.into_inner() } #[inline] pub fn weight(&self) -> u64 { self.weight } pub fn add(&mut self, sum: f64, weight: u64) -> f64 { let weight_: u64 = self.weight; let mean_: f64 = self.mean.into_inner(); let new_sum: f64 = sum + weight_ as f64 * mean_; let new_weight: u64 = weight_ + weight; self.weight = new_weight; self.mean = OrderedFloat::from(new_sum / new_weight as f64); new_sum } } impl Default for Centroid { fn default() -> Self { Centroid { mean: OrderedFloat::from(0.0), weight: 1, } } } /// T-Digest to be operated on. #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] pub struct TDigest { centroids: Vec, max_size: usize, sum: OrderedFloat, count: u64, max: OrderedFloat, min: OrderedFloat, } impl TDigest { pub fn new_with_size(max_size: usize) -> Self { TDigest { centroids: Vec::new(), max_size, sum: OrderedFloat::from(0.0), count: 0, max: OrderedFloat::from(f64::NAN), min: OrderedFloat::from(f64::NAN), } } pub fn new( centroids: Vec, sum: f64, count: u64, max: f64, min: f64, max_size: usize, ) -> Self { if centroids.len() <= max_size { TDigest { centroids, max_size, sum: OrderedFloat::from(sum), count, max: OrderedFloat::from(max), min: OrderedFloat::from(min), } } else { let sz = centroids.len(); let digests: Vec = vec![ TDigest::new_with_size(max_size), TDigest::new(centroids, sum, count, max, min, sz), ]; Self::merge_digests(digests) } } pub fn raw_centroids(&self) -> &[Centroid] { &self.centroids } #[inline] pub fn mean(&self) -> f64 { let sum_: f64 = self.sum.into_inner(); if self.count > 0 { sum_ / self.count as f64 } else { 0.0 } } #[inline] pub fn sum(&self) -> f64 { self.sum.into_inner() } #[inline] pub fn count(&self) -> u64 { self.count } #[inline] pub fn max(&self) -> f64 { self.max.into_inner() } #[inline] pub fn min(&self) -> f64 { self.min.into_inner() } #[inline] pub fn is_empty(&self) -> bool { self.centroids.is_empty() } #[inline] pub fn max_size(&self) -> usize { self.max_size } #[inline] pub fn num_buckets(&self) -> usize { self.centroids.len() } pub fn format_for_postgres(&self) -> String { /// Mimics the version-1 serialization format the extension uses. TODO don't! #[derive(Serialize)] struct Hack { version: u32, buckets: usize, max_buckets: usize, count: u64, sum: f64, min: f64, max: f64, centroids: Vec, } let max_buckets = self.max_size(); let centroids = self.raw_centroids(); ron::to_string(&Hack { version: 1, max_buckets, buckets: centroids.len(), count: self.count(), sum: self.sum(), min: self.min(), max: self.max(), centroids: centroids.to_vec(), }) .unwrap() } } impl Default for TDigest { fn default() -> Self { TDigest { centroids: Vec::new(), max_size: 100, sum: OrderedFloat::from(0.0), count: 0, max: OrderedFloat::from(f64::NAN), min: OrderedFloat::from(f64::NAN), } } } impl TDigest { fn k_to_q(k: f64, d: f64) -> f64 { let k_div_d = k / d; if k_div_d >= 0.5 { let base = 1.0 - k_div_d; 1.0 - 2.0 * base * base } else { 2.0 * k_div_d * k_div_d } } pub fn merge_unsorted(&self, unsorted_values: Vec) -> TDigest { let mut sorted_values: Vec> = unsorted_values .into_iter() .map(OrderedFloat::from) .collect(); sorted_values.sort(); let sorted_values = sorted_values.into_iter().map(|f| f.into_inner()).collect(); self.merge_sorted(sorted_values) } // Allow f64 overflow to create centroids with infinite mean, but make sure our min/max are updated fn update_bounds_on_overflow( value: OrderedFloat, lower_bound: &mut OrderedFloat, upper_bound: &mut OrderedFloat, ) { if value < *lower_bound { *lower_bound = value; } if value > *upper_bound { *upper_bound = value; } } pub fn merge_sorted(&self, sorted_values: Vec) -> TDigest { if sorted_values.is_empty() { return self.clone(); } let mut result = TDigest::new_with_size(self.max_size()); result.count = self.count() + (sorted_values.len() as u64); let maybe_min = OrderedFloat::from(*sorted_values.first().unwrap()); let maybe_max = OrderedFloat::from(*sorted_values.last().unwrap()); if self.count() > 0 { result.min = std::cmp::min(self.min, maybe_min); result.max = std::cmp::max(self.max, maybe_max); } else { result.min = maybe_min; result.max = maybe_max; } let mut compressed: Vec = Vec::with_capacity(self.max_size); let mut k_limit: f64 = 1.0; let mut q_limit_times_count: f64 = Self::k_to_q(k_limit, self.max_size as f64) * result.count as f64; k_limit += 1.0; let mut iter_centroids = self.centroids.iter().peekable(); let mut iter_sorted_values = sorted_values.iter().peekable(); let mut curr: Centroid = if let Some(c) = iter_centroids.peek() { let curr = **iter_sorted_values.peek().unwrap(); if c.mean() < curr { iter_centroids.next().unwrap().clone() } else { Centroid::new(*iter_sorted_values.next().unwrap(), 1) } } else { Centroid::new(*iter_sorted_values.next().unwrap(), 1) }; let mut weight_so_far: u64 = curr.weight(); let mut sums_to_merge: f64 = 0.0; let mut weights_to_merge: u64 = 0; while iter_centroids.peek().is_some() || iter_sorted_values.peek().is_some() { let next: Centroid = if let Some(c) = iter_centroids.peek() { if iter_sorted_values.peek().is_none() || c.mean() < **iter_sorted_values.peek().unwrap() { iter_centroids.next().unwrap().clone() } else { Centroid::new(*iter_sorted_values.next().unwrap(), 1) } } else { Centroid::new(*iter_sorted_values.next().unwrap(), 1) }; let next_sum: f64 = next.mean() * next.weight() as f64; weight_so_far += next.weight(); if weight_so_far as f64 <= q_limit_times_count { sums_to_merge += next_sum; weights_to_merge += next.weight(); } else { result.sum = OrderedFloat::from( result.sum.into_inner() + curr.add(sums_to_merge, weights_to_merge), ); sums_to_merge = 0.0; weights_to_merge = 0; TDigest::update_bounds_on_overflow(curr.mean, &mut result.min, &mut result.max); compressed.push(curr.clone()); q_limit_times_count = Self::k_to_q(k_limit, self.max_size as f64) * result.count() as f64; k_limit += 1.0; curr = next; } } result.sum = OrderedFloat::from(result.sum.into_inner() + curr.add(sums_to_merge, weights_to_merge)); TDigest::update_bounds_on_overflow(curr.mean, &mut result.min, &mut result.max); compressed.push(curr); compressed.shrink_to_fit(); compressed.sort(); result.centroids = compressed; result } fn external_merge(centroids: &mut [Centroid], first: usize, middle: usize, last: usize) { let mut result: Vec = Vec::with_capacity(centroids.len()); let mut i = first; let mut j = middle; while i < middle && j < last { match centroids[i].cmp(¢roids[j]) { Ordering::Less => { result.push(centroids[i].clone()); i += 1; } Ordering::Greater => { result.push(centroids[j].clone()); j += 1; } Ordering::Equal => { result.push(centroids[i].clone()); i += 1; } } } while i < middle { result.push(centroids[i].clone()); i += 1; } while j < last { result.push(centroids[j].clone()); j += 1; } i = first; for centroid in result.into_iter() { centroids[i] = centroid; i += 1; } } // Merge multiple T-Digests pub fn merge_digests(digests: Vec) -> TDigest { let n_centroids: usize = digests.iter().map(|d| d.centroids.len()).sum(); if n_centroids == 0 { return TDigest::default(); } // TODO should this be the smaller of the sizes? let max_size = digests.first().unwrap().max_size; let mut centroids: Vec = Vec::with_capacity(n_centroids); let mut starts: Vec = Vec::with_capacity(digests.len()); let mut count: u64 = 0; let mut min = OrderedFloat::from(f64::INFINITY); let mut max = OrderedFloat::from(f64::NEG_INFINITY); let mut start: usize = 0; for digest in digests.into_iter() { starts.push(start); let curr_count: u64 = digest.count(); if curr_count > 0 { min = std::cmp::min(min, digest.min); max = std::cmp::max(max, digest.max); count += curr_count; for centroid in digest.centroids { centroids.push(centroid); start += 1; } } } let mut digests_per_block: usize = 1; while digests_per_block < starts.len() { for i in (0..starts.len()).step_by(digests_per_block * 2) { if i + digests_per_block < starts.len() { let first = starts[i]; let middle = starts[i + digests_per_block]; let last = if i + 2 * digests_per_block < starts.len() { starts[i + 2 * digests_per_block] } else { centroids.len() }; debug_assert!(first <= middle && middle <= last); Self::external_merge(&mut centroids, first, middle, last); } } digests_per_block *= 2; } let mut result = TDigest::new_with_size(max_size); let mut compressed: Vec = Vec::with_capacity(max_size); let mut k_limit: f64 = 1.0; let mut q_limit_times_count: f64 = Self::k_to_q(k_limit, max_size as f64) * (count as f64); let mut iter_centroids = centroids.iter_mut(); let mut curr = iter_centroids.next().unwrap(); let mut weight_so_far: u64 = curr.weight(); let mut sums_to_merge: f64 = 0.0; let mut weights_to_merge: u64 = 0; for centroid in iter_centroids { weight_so_far += centroid.weight(); if weight_so_far as f64 <= q_limit_times_count { sums_to_merge += centroid.mean() * centroid.weight() as f64; weights_to_merge += centroid.weight(); } else { result.sum = OrderedFloat::from( result.sum.into_inner() + curr.add(sums_to_merge, weights_to_merge), ); sums_to_merge = 0.0; weights_to_merge = 0; TDigest::update_bounds_on_overflow(curr.mean, &mut min, &mut max); compressed.push(curr.clone()); q_limit_times_count = Self::k_to_q(k_limit, max_size as f64) * (count as f64); k_limit += 1.0; curr = centroid; } } result.sum = OrderedFloat::from(result.sum.into_inner() + curr.add(sums_to_merge, weights_to_merge)); TDigest::update_bounds_on_overflow(curr.mean, &mut min, &mut max); compressed.push(curr.clone()); compressed.shrink_to_fit(); compressed.sort(); result.count = count; result.min = min; result.max = max; result.centroids = compressed; result } /// Given a value estimate the corresponding quantile in a digest pub fn estimate_quantile_at_value(&self, v: f64) -> f64 { if self.centroids.is_empty() { return 0.0; } if v < self.min.into_inner() { return 0.0; } if v > self.max.into_inner() { return 1.0; } let mut low_bound = self.min.into_inner(); let mut low_weight = 0; let mut hi_bound = self.max.into_inner(); let mut hi_weight = 0; let mut accum_weight = 0; for cent in &self.centroids { if v < cent.mean.into_inner() { hi_bound = cent.mean.into_inner(); hi_weight = cent.weight; break; } low_bound = cent.mean.into_inner(); low_weight = cent.weight; accum_weight += low_weight; } let weighted_midpoint = low_bound + (hi_bound - low_bound) * low_weight as f64 / (low_weight + hi_weight) as f64; if v > weighted_midpoint { (accum_weight as f64 + (v - weighted_midpoint) / (hi_bound - weighted_midpoint) * hi_weight as f64 / 2.0) / self.count as f64 } else { (accum_weight as f64 - (weighted_midpoint - v) / (weighted_midpoint - low_bound) * low_weight as f64 / 2.0) / self.count as f64 } } /// To estimate the value located at `q` quantile pub fn estimate_quantile(&self, q: f64) -> f64 { if self.centroids.is_empty() { return 0.0; } let rank: f64 = q * self.count as f64; let mut pos: usize; let mut t: u64; if q > 0.5 { if q >= 1.0 { return self.max(); } pos = 0; t = self.count; for (k, centroid) in self.centroids.iter().enumerate().rev() { t -= centroid.weight(); if rank >= t as f64 { pos = k; break; } } } else { if q <= 0.0 || rank <= 1.0 { return self.min(); } pos = self.centroids.len() - 1; t = 0; for (k, centroid) in self.centroids.iter().enumerate() { if rank < (t + centroid.weight()) as f64 { pos = k; break; } t += centroid.weight(); } } // At this point pos indexes the centroid containing the desired rank and t is the combined weight of all buckets < pos // With this we can determine the location of our target rank within the range covered by centroid 'pos' let centroid_weight = (rank - t as f64) / self.centroids[pos].weight() as f64; // Now we use that location to interpolate the desired value between the centroid mean and the weighted midpoint between the next centroid in the direction of the target rank. let diff = centroid_weight - 0.5; return if diff.abs() < f64::EPSILON { self.centroids[pos].mean() } else if diff.is_sign_negative() { let weighted_lower_bound = if pos == 0 { weighted_average( self.min(), 0, self.centroids[pos].mean(), self.centroids[pos].weight(), ) } else { weighted_average( self.centroids[pos - 1].mean(), self.centroids[pos - 1].weight(), self.centroids[pos].mean(), self.centroids[pos].weight(), ) }; interpolate( weighted_lower_bound, self.centroids[pos].mean(), centroid_weight * 2.0, ) } else { let weighted_upper_bound = if pos == self.centroids.len() - 1 { weighted_average( self.centroids[pos].mean(), self.centroids[pos].weight(), self.max(), 0, ) } else { weighted_average( self.centroids[pos].mean(), self.centroids[pos].weight(), self.centroids[pos + 1].mean(), self.centroids[pos + 1].weight(), ) }; interpolate( self.centroids[pos].mean(), weighted_upper_bound, (centroid_weight - 0.5) * 2.0, ) }; // Helper functions for quantile calculation // Given two points and their relative weights, return the weight midpoint (i.e. if p2 is twice the weight of p1, the midpoint will be twice as close to p2 as to p1) fn weighted_average(p1: f64, p1_weight: u64, p2: f64, p2_weight: u64) -> f64 { interpolate(p1, p2, p1_weight as f64 / (p1_weight + p2_weight) as f64) } // Given two points and a weight in the range [0,1], return p1 + weight * (p2-p1) fn interpolate(p1: f64, p2: f64, weight: f64) -> f64 { // We always call this with p2 >= p1 and ensuring this reduces the cases we have to match on debug_assert!(OrderedFloat::from(p2) >= OrderedFloat::from(p1)); // Not being able to match on floats makes this match much uglier than it should be match (p1.is_infinite(), p2.is_infinite(), p1.is_sign_positive(), !p2.is_sign_negative()) { (true, true, false, true) /* (f64::NEG_INFINITY, f64::INFINITY) */ => f64::NAN, // This is a stupid case, and the only time we'll see quantile return a NaN (true, _, false, _) /* (f64::NEG_INFINITY, _) */ => f64::NEG_INFINITY, (_, true, _, true) /* (_, f64::INFINITY) */ => f64::INFINITY, _ => p1 + (p2 - p1) * weight } } } } // This is a tdigest object paired // with a vector of values that still need to be inserted. #[derive(Clone, Debug, Default, Deserialize, Serialize)] pub struct Builder { #[serde(skip)] buffer: Vec, digested: TDigest, } impl From for Builder { fn from(digested: TDigest) -> Self { Self { digested, ..Default::default() } } } impl Builder { pub fn with_size(size: usize) -> Self { Self::from(TDigest::new_with_size(size)) } // Add a new value, recalculate the digest if we've crossed a threshold. // TODO threshold is currently set to number of digest buckets, should this be adjusted pub fn push(&mut self, value: f64) { self.buffer.push(value); if self.buffer.len() >= self.digested.max_size() { self.digest() } } // Update the digest with all accumulated values. fn digest(&mut self) { if self.buffer.is_empty() { return; } let new = std::mem::take(&mut self.buffer); self.digested = self.digested.merge_unsorted(new) } pub fn build(&mut self) -> TDigest { self.digest(); std::mem::take(&mut self.digested) } pub fn merge(&mut self, other: Self) { assert_eq!(self.digested.max_size(), other.digested.max_size()); let digvec = vec![std::mem::take(&mut self.digested), other.digested]; if !self.buffer.is_empty() { digvec[0].merge_unsorted(std::mem::take(&mut self.buffer)); } if !other.buffer.is_empty() { digvec[1].merge_unsorted(other.buffer); } self.digested = TDigest::merge_digests(digvec); } } #[cfg(test)] mod tests { use super::*; #[test] fn test_centroid_addition_regression() { //https://github.com/MnO2/t-digest/pull/1 let vals = vec![1.0, 1.0, 1.0, 2.0, 1.0, 1.0]; let mut t = TDigest::new_with_size(10); for v in vals { t = t.merge_unsorted(vec![v]); } let ans = t.estimate_quantile(0.5); let expected: f64 = 1.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.95); let expected: f64 = 2.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); } #[test] fn test_merge_sorted_against_uniform_distro() { let t = TDigest::new_with_size(100); let values: Vec = (1..=1_000_000).map(f64::from).collect(); let t = t.merge_sorted(values); let ans = t.estimate_quantile(1.0); let expected: f64 = 1_000_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.99); let expected: f64 = 990_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.01); let expected: f64 = 10_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.0); let expected: f64 = 1.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.5); let expected: f64 = 500_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); } #[test] fn test_merge_unsorted_against_uniform_distro() { let t = TDigest::new_with_size(100); let values: Vec = (1..=1_000_000).map(f64::from).collect(); let t = t.merge_unsorted(values); let ans = t.estimate_quantile(1.0); let expected: f64 = 1_000_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.99); let expected: f64 = 990_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.01); let expected: f64 = 10_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.0); let expected: f64 = 1.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.5); let expected: f64 = 500_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); } #[test] fn test_merge_sorted_against_skewed_distro() { let t = TDigest::new_with_size(100); let mut values: Vec = (1..=600_000).map(f64::from).collect(); values.resize(values.len() + 400_000, 1_000_000.0); let t = t.merge_sorted(values); let ans = t.estimate_quantile(0.99); let expected: f64 = 1_000_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.01); let expected: f64 = 10_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.5); let expected: f64 = 500_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); } #[test] fn test_merge_unsorted_against_skewed_distro() { let t = TDigest::new_with_size(100); let mut values: Vec = (1..=600_000).map(f64::from).collect(); values.resize(values.len() + 400_000, 1_000_000.0); let t = t.merge_unsorted(values); let ans = t.estimate_quantile(0.99); let expected: f64 = 1_000_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.01); let expected: f64 = 10_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.5); let expected: f64 = 500_000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); } #[test] fn test_merge_digests() { let mut digests: Vec = Vec::new(); for _ in 1..=100 { let t = TDigest::new_with_size(100); let values: Vec = (1..=1_000).map(f64::from).collect(); let t = t.merge_sorted(values); digests.push(t) } let t = TDigest::merge_digests(digests); let ans = t.estimate_quantile(1.0); let expected: f64 = 1000.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.99); let expected: f64 = 990.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.01); let expected: f64 = 10.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.2); let ans = t.estimate_quantile(0.0); let expected: f64 = 1.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); let ans = t.estimate_quantile(0.5); let expected: f64 = 500.0; let percentage: f64 = (expected - ans).abs() / expected; assert!(percentage < 0.01); } #[test] fn test_quantile_and_value_estimates() { let t = TDigest::new_with_size(100); let values: Vec = (1..=10000).map(|v| f64::from(v) / 100.0).collect(); let t = t.merge_sorted(values); for i in 1..=100 { let value = i as f64; let quantile = value / 100.0; let test_value = t.estimate_quantile(quantile); let test_quant = t.estimate_quantile_at_value(value); let percentage = (test_value - value).abs() / value; assert!( percentage < 0.01, "Exceeded 1% error on quantile {}: expected {}, received {} (error% {})", quantile, value, test_value, (test_value - value).abs() / value * 100.0 ); let percentage = (test_quant - quantile).abs() / quantile; assert!( percentage < 0.01, "Exceeded 1% error on quantile at value {}: expected {}, received {} (error% {})", value, quantile, test_quant, (test_quant - quantile).abs() / quantile * 100.0 ); let test = t.estimate_quantile_at_value(t.estimate_quantile(quantile)); let percentage = (test - quantile).abs() / quantile; assert!(percentage < 0.001); } } #[test] fn test_buffered_merge() { let mut digested = TDigest::new_with_size(100); let mut buffer = vec![]; for i in 1..=100 { buffer.push(i as f64); if buffer.len() >= digested.max_size() { let new = std::mem::take(&mut buffer); digested = digested.merge_unsorted(new) } } if !buffer.is_empty() { digested = digested.merge_unsorted(buffer) } let estimate = digested.estimate_quantile(0.99); assert_eq!(estimate, 99.5); } use quickcheck::*; #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] struct OrderedF64(OrderedFloat); impl Arbitrary for OrderedF64 { fn arbitrary(g: &mut Gen) -> Self { OrderedF64(f64::arbitrary(g).into()) } } #[quickcheck] fn fuzzing_test( batch1: HashSet, batch2: HashSet, batch3: HashSet, batch4: HashSet, ) -> TestResult { let batch1: Vec = batch1 .into_iter() .map(|x| x.0.into()) .filter(|x: &f64| !x.is_nan()) .collect(); let batch2: Vec = batch2 .into_iter() .map(|x| x.0.into()) .filter(|x: &f64| !x.is_nan()) .collect(); let batch3: Vec = batch3 .into_iter() .map(|x| x.0.into()) .filter(|x: &f64| !x.is_nan()) .collect(); let batch4: Vec = batch4 .into_iter() .map(|x| x.0.into()) .filter(|x: &f64| !x.is_nan()) .collect(); let digest1 = TDigest::new_with_size(20).merge_unsorted(batch1.clone()); let digest1 = digest1.merge_unsorted(batch2.clone()); let digest2 = TDigest::new_with_size(20).merge_unsorted(batch3.clone()); let digest2 = digest2.merge_unsorted(batch4.clone()); let digest = TDigest::merge_digests(vec![digest1, digest2]); let quantile_tests = [0.01, 0.1, 0.25, 0.5, 0.6, 0.8, 0.95]; let tolerated_percentile_error = [0.010001, 0.100001, 0.2, 0.30, 0.275, 0.1725, 0.050001]; // .000001 cases are to handle rounding errors on cases that might return infinities let mut master: Vec = batch1 .iter() .chain(batch2.iter()) .chain(batch3.iter()) .chain(batch4.iter()) .copied() .collect(); master.sort_by(|a, b| a.partial_cmp(b).unwrap()); if master.len() < 100 { return TestResult::discard(); } for i in 0..quantile_tests.len() { let quantile = quantile_tests[i]; let error_bound = tolerated_percentile_error[i]; let test_val = digest.estimate_quantile(quantile); let target_idx = quantile * master.len() as f64; let target_allowed_error = master.len() as f64 * error_bound; let mut test_idx = 0; if test_val != f64::INFINITY { while test_idx < master.len() && master[test_idx] < test_val { test_idx += 1; } } else { // inequality checking against infinity is wonky test_idx = master.len(); } // test idx is now the idx of the smallest element >= test_val (and yes, this could be done faster with binary search) assert!((test_idx as f64) >= target_idx - target_allowed_error && (test_idx as f64) <= target_idx + target_allowed_error, "testing {} quantile returned {}, there are {} values lower than this, target range {} +/- {}", quantile, test_val, test_idx, target_idx, target_allowed_error); } TestResult::passed() } } ================================================ FILE: crates/t-digest-lib/Cargo.toml ================================================ [package] name = "tdigest-lib" version = "0.0.0" edition = "2021" [lib] name = "timescaledb_toolkit_tdigest" crate-type = ["cdylib", "staticlib"] [dependencies] libc = "0.2.135" tdigest = { path="../t-digest" } ================================================ FILE: crates/t-digest-lib/src/lib.rs ================================================ // There is no safety here: it's all in the hands of the caller, bless their heart. #![allow(clippy::missing_safety_doc)] #[unsafe(no_mangle)] pub extern "C" fn timescaledb_toolkit_tdigest_builder_with_size( size: usize, ) -> Box { Box::new(tdigest::Builder::with_size(size)) } #[unsafe(no_mangle)] pub unsafe extern "C" fn timescaledb_toolkit_tdigest_push( builder: *mut tdigest::Builder, value: f64, ) { (*builder).push(value) } // TODO Don't abort the process if `builder` and `other` weren't created with the same size. #[unsafe(no_mangle)] pub unsafe extern "C" fn timescaledb_toolkit_tdigest_merge( builder: *mut tdigest::Builder, other: Box, ) { let other = *other; (*builder).merge(other) } #[unsafe(no_mangle)] pub extern "C" fn timescaledb_toolkit_tdigest_builder_free(_: Box) {} #[unsafe(no_mangle)] pub extern "C" fn timescaledb_toolkit_tdigest_build( mut builder: Box, ) -> Box { Box::new(builder.build()) } #[unsafe(no_mangle)] pub extern "C" fn timescaledb_toolkit_tdigest_free(_: Box) {} // TODO Messy, but good enough to experiment with. We might want to // into_raw_parts the String and offer a transparent struct containing pointer // to and size of the buffer, with a ts_tk_tdigest_string_free taking it back // and releasing it. That also avoids one copy. #[unsafe(no_mangle)] pub unsafe extern "C" fn timescaledb_toolkit_tdigest_format_for_postgres( td: *const tdigest::TDigest, ) -> *mut libc::c_char { let s = (*td).format_for_postgres(); let buf = libc::malloc(s.len() + 1); libc::memcpy(buf, s.as_ptr() as *const libc::c_void, s.len()); let buf = buf as *mut libc::c_char; let r = std::slice::from_raw_parts_mut(buf, s.len() + 1); r[s.len()] = 0; buf } ================================================ FILE: crates/time-weighted-average/Cargo.toml ================================================ [package] name = "time_weighted_average" version = "0.1.0" authors = ["David Kohn "] edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] flat_serialize = {path="../flat_serialize/flat_serialize"} flat_serialize_macro = {path="../flat_serialize/flat_serialize_macro"} serde = { version = "1.0", features = ["derive"] } tspoint = {path="../tspoint"} ================================================ FILE: crates/time-weighted-average/src/lib.rs ================================================ use serde::{Deserialize, Serialize}; use tspoint::TSPoint; use flat_serialize_macro::FlatSerializable; #[derive(Clone, Copy, PartialEq, Eq, Debug, Serialize, Deserialize, FlatSerializable)] #[repr(u8)] pub enum TimeWeightMethod { LOCF = 0, Linear, } #[derive(Clone, Copy, PartialEq, Debug, Serialize, Deserialize)] pub struct TimeWeightSummary { pub method: TimeWeightMethod, pub first: TSPoint, pub last: TSPoint, pub w_sum: f64, } #[derive(PartialEq, Eq, Debug)] pub enum TimeWeightError { OrderError, DoubleOverflow, // do we need to do this? MethodMismatch, InterpolateMissingPoint, ZeroDuration, EmptyIterator, } impl TimeWeightSummary { pub fn new(pt: TSPoint, method: TimeWeightMethod) -> Self { TimeWeightSummary { method, first: pt, last: pt, w_sum: 0.0, } } pub fn accum(&mut self, pt: TSPoint) -> Result<(), TimeWeightError> { if pt.ts < self.last.ts { return Err(TimeWeightError::OrderError); } if pt.ts == self.last.ts { // if two points are equal we only use the first we see // see discussion at https://github.com/timescale/timescaledb-toolkit/discussions/65 return Ok(()); } self.w_sum += self.method.weighted_sum(self.last, pt); self.last = pt; Ok(()) } // This combine function is different than some other combine functions as it requires disjoint time ranges in order to work // correctly. The aggregate will never be parallel safe in the Postgres formulation because of this. However in the continuous // aggregate context (and potentially in a multinode context) where we can be sure of disjoint time ranges, this will work. // If there are space partitions, the space partition keys should be included in the group bys in order to be sure of this, otherwise // overlapping ranges will be created. pub fn combine(&self, next: &TimeWeightSummary) -> Result { if self.method != next.method { return Err(TimeWeightError::MethodMismatch); } if self.last.ts >= next.first.ts { // this combine function should always be pulling from disjoint sets, so duplicate values do not need to be handled // as we do in accum() (where duplicates are ignored) here we throw an error, because duplicate values should // always have been sorted into one or another bucket, and it means that the bounds of our buckets were wrong. return Err(TimeWeightError::OrderError); } let new = TimeWeightSummary { method: self.method, first: self.first, last: next.last, w_sum: self.w_sum + next.w_sum + self.method.weighted_sum(self.last, next.first), }; Ok(new) } pub fn new_from_sorted_iter<'a>( iter: impl IntoIterator, method: TimeWeightMethod, ) -> Result { let mut t = iter.into_iter(); let mut s = match t.next() { None => { return Err(TimeWeightError::EmptyIterator); } Some(val) => TimeWeightSummary::new(*val, method), }; for p in t { s.accum(*p)?; } Ok(s) } pub fn combine_sorted_iter<'a>( iter: impl IntoIterator, ) -> Result { let mut t = iter.into_iter(); let mut s = match t.next() { None => { return Err(TimeWeightError::EmptyIterator); } Some(val) => *val, }; for p in t { s = s.combine(p)?; } Ok(s) } /// Extrapolate a TimeWeightSummary to bounds using the method and provided points outside the bounds of the original summary. /// This is especially useful for cases where you want to get an average for, say, a time_bucket, using points outside of that time_bucket. /// The initial aggregate will only have points within the time bucket, but outside of it, you will either have a point that you select /// or a TimeWeightSummary where the first or last point can be used depending on which bound you are extrapolating to. /// 1. The start_prev parameter is optional, but if a start is provided a previous point must be /// provided (for both linear and locf weighting methods). /// 2. The end_next parameter is also optional, if an end is provided and the locf weighting /// method is specified, a next parameter isn't needed, with the linear method, the next /// point is needed and we will error if it is not provided. pub fn with_bounds( &self, start_prev: Option<(i64, TSPoint)>, end_next: Option<(i64, Option)>, ) -> Result { let mut calc = *self; if let Some((start, prev)) = start_prev { calc = self.with_prev(start, prev)? } if let Some((end, next)) = end_next { calc = self.with_next(end, next)? } Ok(calc) } fn with_prev(&self, target_start: i64, prev: TSPoint) -> Result { // target_start must always be between [prev.ts, self.first.ts] if prev.ts >= self.first.ts || target_start > self.first.ts || prev.ts > target_start { return Err(TimeWeightError::OrderError); // should this be a different error? } if target_start == self.first.ts { return Ok(*self); } let new_first = self .method .interpolate(prev, Some(self.first), target_start)?; let w_sum = self.w_sum + self.method.weighted_sum(new_first, self.first); Ok(TimeWeightSummary { first: new_first, w_sum, ..*self }) } fn with_next(&self, target_end: i64, next: Option) -> Result { if target_end < self.last.ts { // equal is okay, will just reduce to zero add in the sum, but not an error return Err(TimeWeightError::OrderError); } // if our target matches last, there's no work to do, we're already there. if target_end == self.last.ts { return Ok(*self); } if let Some(next) = next { if next.ts < target_end { return Err(TimeWeightError::OrderError); } } let new_last = self.method.interpolate(self.last, next, target_end)?; let w_sum = self.w_sum + self.method.weighted_sum(self.last, new_last); Ok(TimeWeightSummary { last: new_last, w_sum, ..*self }) } ///Evaluate the time_weighted_average from the summary. pub fn time_weighted_average(&self) -> Result { if self.last.ts == self.first.ts { return Err(TimeWeightError::ZeroDuration); } let duration = (self.last.ts - self.first.ts) as f64; Ok(self.w_sum / duration) } /// Evaluate the integral in microseconds. pub fn time_weighted_integral(&self) -> f64 { if self.last.ts == self.first.ts { // the integral of a duration of zero width is zero 0.0 } else { self.w_sum } } } impl TimeWeightMethod { pub fn interpolate( &self, first: TSPoint, second: Option, target: i64, ) -> Result { if let Some(second) = second { if second.ts <= first.ts { return Err(TimeWeightError::OrderError); } } let pt = TSPoint { ts: target, val: match (self, second) { (TimeWeightMethod::LOCF, _) => first.val, // TODO make this a method on TimeWeightMethod? (TimeWeightMethod::Linear, Some(second)) => { first.interpolate_linear(&second, target).unwrap() } (TimeWeightMethod::Linear, None) => { return Err(TimeWeightError::InterpolateMissingPoint) } }, }; Ok(pt) } pub fn weighted_sum(&self, first: TSPoint, second: TSPoint) -> f64 { debug_assert!(second.ts > first.ts); let duration = (second.ts - first.ts) as f64; match self { TimeWeightMethod::LOCF => first.val * duration, //the weighting for a linear interpolation is equivalent to the midpoint //between the two values, this is because we're taking the area under the //curve, which is the sum of the smaller of the two values multiplied by //duration (a rectangle) + the triangle formed on top (abs diff between the //two / 2 * duration) this is equivalent to the rectangle formed by the //midpoint of the two. //TODO: Stable midpoint calc? http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0811r2.html TimeWeightMethod::Linear => (first.val + second.val) / 2.0 * duration, } } } #[cfg(test)] mod tests { use crate::*; // Just creating and basic use works // Simple case gets correct results Done // errors for each of with_prev/with_next, // other error conditions: // weird cases: // NaN/Inf inputs -> should these error? // Overflow? -> Inf // #[test] fn test_simple_accum_locf() { let mut s = TimeWeightSummary::new(TSPoint { ts: 0, val: 1.0 }, TimeWeightMethod::LOCF); assert_eq!(s.w_sum, 0.0); s.accum(TSPoint { ts: 10, val: 0.0 }).unwrap(); assert_eq!(s.w_sum, 10.0); s.accum(TSPoint { ts: 20, val: 2.0 }).unwrap(); assert_eq!(s.w_sum, 10.0); s.accum(TSPoint { ts: 30, val: 1.0 }).unwrap(); assert_eq!(s.w_sum, 30.0); s.accum(TSPoint { ts: 40, val: -3.0 }).unwrap(); assert_eq!(s.w_sum, 40.0); s.accum(TSPoint { ts: 50, val: -3.0 }).unwrap(); assert_eq!(s.w_sum, 10.0); } #[test] fn test_simple_accum_linear() { let mut s = TimeWeightSummary::new(TSPoint { ts: 0, val: 1.0 }, TimeWeightMethod::Linear); assert_eq!(s.w_sum, 0.0); s.accum(TSPoint { ts: 10, val: 0.0 }).unwrap(); assert_eq!(s.w_sum, 5.0); s.accum(TSPoint { ts: 20, val: 2.0 }).unwrap(); assert_eq!(s.w_sum, 15.0); s.accum(TSPoint { ts: 30, val: 1.0 }).unwrap(); assert_eq!(s.w_sum, 30.0); s.accum(TSPoint { ts: 40, val: -3.0 }).unwrap(); assert_eq!(s.w_sum, 20.0); s.accum(TSPoint { ts: 50, val: -3.0 }).unwrap(); assert_eq!(s.w_sum, -10.0); } fn new_from_sorted_iter_test(t: TimeWeightMethod) { // simple test let mut s = TimeWeightSummary::new(TSPoint { ts: 0, val: 1.0 }, t); s.accum(TSPoint { ts: 10, val: 0.0 }).unwrap(); s.accum(TSPoint { ts: 20, val: 2.0 }).unwrap(); s.accum(TSPoint { ts: 30, val: 1.0 }).unwrap(); let n = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 10, val: 0.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 1.0 }, ], t, ) .unwrap(); assert_eq!(s, n); //single value let s = TimeWeightSummary::new(TSPoint { ts: 0, val: 1.0 }, t); let n = TimeWeightSummary::new_from_sorted_iter(vec![&TSPoint { ts: 0, val: 1.0 }], t).unwrap(); assert_eq!(s, n); //no values should error let n = TimeWeightSummary::new_from_sorted_iter(vec![], t); assert_eq!(n, Err(TimeWeightError::EmptyIterator)); } #[test] fn test_new_from_sorted_iter() { new_from_sorted_iter_test(TimeWeightMethod::LOCF); new_from_sorted_iter_test(TimeWeightMethod::Linear); } fn combine_test(t: TimeWeightMethod) { let s = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 10, val: 0.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 1.0 }, ], t, ) .unwrap(); let s1 = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 10, val: 0.0 }], t, ) .unwrap(); let s2 = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 1.0 }], t, ) .unwrap(); let s_comb = s1.combine(&s2).unwrap(); assert_eq!(s, s_comb); // test combine with single val as well as multiple let s21 = TimeWeightSummary::new(TSPoint { ts: 20, val: 2.0 }, t); let s22 = TimeWeightSummary::new(TSPoint { ts: 30, val: 1.0 }, t); assert_eq!(s1.combine(&s21).unwrap().combine(&s22).unwrap(), s); } #[test] fn test_combine() { combine_test(TimeWeightMethod::LOCF); combine_test(TimeWeightMethod::Linear); } fn order_accum_test(t: TimeWeightMethod) { let s = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 10, val: 0.0 }], t, ) .unwrap(); let mut o = s; // adding points at the same timestamp shouldn't affect the value (no matter whether the // value is larger or smaller than the original) o.accum(TSPoint { ts: 10, val: 2.0 }).unwrap(); assert_eq!(s, o); o.accum(TSPoint { ts: 10, val: -1.0 }).unwrap(); assert_eq!(s, o); //but adding out of order points doesn't work assert_eq!( o.accum(TSPoint { ts: 5, val: -1.0 }), Err(TimeWeightError::OrderError) ); //same for new_from_sorted_iter - test that multiple values only the first is taken let n = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 4.0 }, ], t, ) .unwrap(); let m = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 20, val: 0.0 }, &TSPoint { ts: 30, val: 4.0 }, ], t, ) .unwrap(); assert_eq!(m, n); // but out of order inputs correctly error let n = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 10, val: 0.0 }, ], t, ); assert_eq!(n, Err(TimeWeightError::OrderError)); } #[test] fn test_order_accum() { order_accum_test(TimeWeightMethod::LOCF); order_accum_test(TimeWeightMethod::Linear); } fn order_combine_test(t: TimeWeightMethod) { let s = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 10, val: 0.0 }], t, ) .unwrap(); let smaller = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 5, val: 1.0 }, &TSPoint { ts: 15, val: 0.0 }], t, ) .unwrap(); // see note above, but let equal = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 15, val: 0.0 }], t, ) .unwrap(); assert_eq!(s.combine(&smaller), Err(TimeWeightError::OrderError)); assert_eq!(s.combine(&equal), Err(TimeWeightError::OrderError)); } #[test] fn test_order_combine() { order_combine_test(TimeWeightMethod::LOCF); order_combine_test(TimeWeightMethod::Linear); } fn combine_sorted_iter_test(t: TimeWeightMethod) { //simple case let m = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 0.0 }, &TSPoint { ts: 40, val: 4.0 }, ], t, ) .unwrap(); let a = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }], t, ) .unwrap(); let b = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 30, val: 0.0 }, &TSPoint { ts: 40, val: 4.0 }], t, ) .unwrap(); let n = TimeWeightSummary::combine_sorted_iter(vec![&a, &b]).unwrap(); assert_eq!(m, n); //single values are no problem let n = TimeWeightSummary::combine_sorted_iter(vec![&m]).unwrap(); assert_eq!(m, n); //single values in TimeWeightSummaries are no problem let c = TimeWeightSummary::new(TSPoint { ts: 0, val: 1.0 }, t); let d = TimeWeightSummary::new(TSPoint { ts: 20, val: 2.0 }, t); let n = TimeWeightSummary::combine_sorted_iter(vec![&c, &d, &b]).unwrap(); assert_eq!(m, n); // whether single values come first or later let e = TimeWeightSummary::new(TSPoint { ts: 30, val: 0.0 }, t); let f = TimeWeightSummary::new(TSPoint { ts: 40, val: 4.0 }, t); let n = TimeWeightSummary::combine_sorted_iter(vec![&a, &e, &f]).unwrap(); assert_eq!(m, n); // empty iterators error assert_eq!( TimeWeightSummary::combine_sorted_iter(vec![]), Err(TimeWeightError::EmptyIterator) ); // out of order values error let n = TimeWeightSummary::combine_sorted_iter(vec![&c, &d, &f, &e]); assert_eq!(n, Err(TimeWeightError::OrderError)); // even with two values let n = TimeWeightSummary::combine_sorted_iter(vec![&b, &a]); assert_eq!(n, Err(TimeWeightError::OrderError)); } #[test] fn test_combine_sorted_iter() { combine_sorted_iter_test(TimeWeightMethod::LOCF); combine_sorted_iter_test(TimeWeightMethod::Linear); } #[test] fn test_mismatch_combine() { let s1 = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 10, val: 0.0 }], TimeWeightMethod::LOCF, ) .unwrap(); let s2 = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 1.0 }], TimeWeightMethod::Linear, ) .unwrap(); assert_eq!(s1.combine(&s2), Err(TimeWeightError::MethodMismatch)); let s1 = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 0, val: 1.0 }, &TSPoint { ts: 10, val: 0.0 }], TimeWeightMethod::Linear, ) .unwrap(); let s2 = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 1.0 }], TimeWeightMethod::LOCF, ) .unwrap(); assert_eq!(s1.combine(&s2), Err(TimeWeightError::MethodMismatch)); } #[test] fn test_weighted_sum() { let pt1 = TSPoint { ts: 10, val: 20.0 }; let pt2 = TSPoint { ts: 20, val: 40.0 }; let locf = TimeWeightMethod::LOCF.weighted_sum(pt1, pt2); assert_eq!(locf, 200.0); let linear = TimeWeightMethod::Linear.weighted_sum(pt1, pt2); assert_eq!(linear, 300.0); let pt2 = TSPoint { ts: 20, val: -40.0 }; let locf = TimeWeightMethod::LOCF.weighted_sum(pt1, pt2); assert_eq!(locf, 200.0); let linear = TimeWeightMethod::Linear.weighted_sum(pt1, pt2); assert_eq!(linear, -100.0); } fn with_prev_common_test(t: TimeWeightMethod) { let test = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }], t, ) .unwrap(); // target = starting point should produce itself no matter the method let prev = TSPoint { ts: 5, val: 5.0 }; let target: i64 = 10; assert_eq!(test.with_prev(target, prev).unwrap(), test); // target = prev should always produce the same as if we made a new one with prev as the starting point, no matter the extrapolation method, though technically, this shouldn't come up in real world data, because you'd never target a place you had real data for, but that's fine, it's a useful reductive case for testing let prev = TSPoint { ts: 5, val: 5.0 }; let target: i64 = 5; let expected = TimeWeightSummary::new_from_sorted_iter( vec![ &prev, &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }, ], t, ) .unwrap(); assert_eq!(test.with_prev(target, prev).unwrap(), expected); // prev >= first should produce an order error let prev = TSPoint { ts: 10, val: 5.0 }; let target: i64 = 10; assert_eq!( test.with_prev(target, prev).unwrap_err(), TimeWeightError::OrderError ); // target okay, but prev not less than it let prev = TSPoint { ts: 5, val: 5.0 }; let target: i64 = 2; assert_eq!( test.with_prev(target, prev).unwrap_err(), TimeWeightError::OrderError ); // prev okay, but target > start let prev = TSPoint { ts: 5, val: 5.0 }; let target: i64 = 15; assert_eq!( test.with_prev(target, prev).unwrap_err(), TimeWeightError::OrderError ); } #[test] fn test_with_prev() { // adding a previous point is the same as a TimeWeightSummary constructed from the properly extrapolated previous point and the original let test = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }], TimeWeightMethod::LOCF, ) .unwrap(); let prev = TSPoint { ts: 0, val: 5.0 }; let target: i64 = 5; let expected_origin = TSPoint { ts: 5, val: 5.0 }; let expected = TimeWeightSummary::new_from_sorted_iter( vec![ &expected_origin, &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }, ], TimeWeightMethod::LOCF, ) .unwrap(); assert_eq!(test.with_prev(target, prev).unwrap(), expected); // if the Summary uses a linear method, the extrapolation should be linear as well let test = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }], TimeWeightMethod::Linear, ) .unwrap(); let prev = TSPoint { ts: 0, val: 5.0 }; let target: i64 = 5; let expected_origin = TSPoint { ts: 5, val: 3.0 }; let expected = TimeWeightSummary::new_from_sorted_iter( vec![ &expected_origin, &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }, ], TimeWeightMethod::Linear, ) .unwrap(); assert_eq!(test.with_prev(target, prev).unwrap(), expected); // now some common tests: with_prev_common_test(TimeWeightMethod::Linear); with_prev_common_test(TimeWeightMethod::LOCF); } fn with_next_common_test(t: TimeWeightMethod) { let test = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }], t, ) .unwrap(); // target = end point should produce itself no matter the method let next = TSPoint { ts: 25, val: 5.0 }; let target: i64 = 20; assert_eq!(test.with_next(target, Some(next)).unwrap(), test); // target = next should always produce the same as if we added the next point for linear, and will produce the same w_sum, though not the same final point for LOCF, here' we'll test the w_sum. Though technically, this shouldn't come up in real world data, because you'd never target a place you had real data for, but that's fine, it's a useful reductive case for testing let next = TSPoint { ts: 25, val: 5.0 }; let target: i64 = 25; let expected = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 0.0 }, &next, ], t, ) .unwrap(); assert_eq!( test.with_next(target, Some(next)).unwrap().w_sum, expected.w_sum ); // next <= last should produce an order error let next = TSPoint { ts: 20, val: 5.0 }; let target: i64 = 22; assert_eq!( test.with_next(target, Some(next)).unwrap_err(), TimeWeightError::OrderError ); // target okay, but next not greater than it let next = TSPoint { ts: 22, val: 5.0 }; let target: i64 = 25; assert_eq!( test.with_next(target, Some(next)).unwrap_err(), TimeWeightError::OrderError ); // next okay, but target < last let next = TSPoint { ts: 25, val: 5.0 }; let target: i64 = 15; assert_eq!( test.with_next(target, Some(next)).unwrap_err(), TimeWeightError::OrderError ); } #[test] fn test_with_next() { // adding a target_next point is the same as a TimeWeightSummary constructed from the properly extrapolated next point and the original let test = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }], TimeWeightMethod::LOCF, ) .unwrap(); let next = TSPoint { ts: 30, val: 3.0 }; let target: i64 = 25; let expected_next = TSPoint { ts: 25, val: 2.0 }; let expected = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &expected_next, ], TimeWeightMethod::LOCF, ) .unwrap(); assert_eq!(test.with_next(target, Some(next)).unwrap(), expected); // For LOCF it doesn't matter if next is provided, only the target is required assert_eq!(test.with_next(target, None).unwrap(), expected); // if the Summary uses a linear method, the extrapolation should be linear as well let test = TimeWeightSummary::new_from_sorted_iter( vec![&TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }], TimeWeightMethod::Linear, ) .unwrap(); let next = TSPoint { ts: 30, val: 3.0 }; let target: i64 = 25; let expected_next = TSPoint { ts: 25, val: 2.5 }; let expected = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &expected_next, ], TimeWeightMethod::Linear, ) .unwrap(); assert_eq!(test.with_next(target, Some(next)).unwrap(), expected); // For Linear method, we need the second point, and not providing a next will error: assert_eq!( test.with_next(target, None).unwrap_err(), TimeWeightError::InterpolateMissingPoint ); // now some common tests: with_next_common_test(TimeWeightMethod::Linear); with_next_common_test(TimeWeightMethod::LOCF); } // add average tests fn average_common_tests(t: TimeWeightMethod) { let single = TimeWeightSummary::new(TSPoint { ts: 20, val: 2.0 }, t); assert_eq!( single.time_weighted_average().unwrap_err(), TimeWeightError::ZeroDuration ); } #[test] fn test_average() { average_common_tests(TimeWeightMethod::Linear); average_common_tests(TimeWeightMethod::LOCF); let test = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 3.0 }, ], TimeWeightMethod::LOCF, ) .unwrap(); let expected = (10.0 * 1.0 + 10.0 * 2.0) / (30.0 - 10.0); assert_eq!(test.time_weighted_average().unwrap(), expected); let test = TimeWeightSummary::new_from_sorted_iter( vec![ &TSPoint { ts: 10, val: 1.0 }, &TSPoint { ts: 20, val: 2.0 }, &TSPoint { ts: 30, val: 3.0 }, ], TimeWeightMethod::Linear, ) .unwrap(); let expected = (10.0 * 1.5 + 10.0 * 2.5) / (30.0 - 10.0); assert_eq!(test.time_weighted_average().unwrap(), expected); } } ================================================ FILE: crates/tspoint/Cargo.toml ================================================ [package] name = "tspoint" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] flat_serialize = {path="../flat_serialize/flat_serialize"} flat_serialize_macro = {path="../flat_serialize/flat_serialize_macro"} serde = { version = "1.0", features = ["derive"] } ================================================ FILE: crates/tspoint/src/lib.rs ================================================ use serde::{ser::SerializeStruct, Deserialize, Serialize}; use flat_serialize_macro::FlatSerializable; use std::ffi::CStr; #[derive(Clone, Copy, PartialEq, Debug, FlatSerializable)] #[repr(C)] pub struct TSPoint { pub ts: i64, pub val: f64, } #[derive(Debug, PartialEq, Eq)] pub enum TSPointError { TimesEqualInterpolate, } impl TSPoint { pub fn interpolate_linear(&self, p2: &TSPoint, ts: i64) -> Result { if self.ts == p2.ts { return Err(TSPointError::TimesEqualInterpolate); } // using point slope form of a line iteratively y = y2 - y1 / (x2 - x1) * (x - x1) + y1 let duration = (p2.ts - self.ts) as f64; // x2 - x1 let dinterp = (ts - self.ts) as f64; // x - x1 Ok((p2.val - self.val) * dinterp / duration + self.val) } } impl Serialize for TSPoint { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { if serializer.is_human_readable() { // FIXME ugly hack to use postgres functions in an non-postgres library unsafe extern "C" { fn _ts_toolkit_encode_timestamptz(dt: i64, buf: &mut [u8; 128]); } let mut ts = [0; 128]; unsafe { _ts_toolkit_encode_timestamptz(self.ts, &mut ts); } let end = ts.iter().position(|c| *c == 0).unwrap(); let ts = CStr::from_bytes_with_nul(&ts[..end + 1]).unwrap(); let ts = ts.to_str().unwrap(); let mut point = serializer.serialize_struct("TSPoint", 2)?; point.serialize_field("ts", &ts)?; point.serialize_field("val", &self.val)?; point.end() } else { let mut point = serializer.serialize_struct("TSPoint", 2)?; point.serialize_field("ts", &self.ts)?; point.serialize_field("val", &self.val)?; point.end() } } } impl<'de> Deserialize<'de> for TSPoint { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { use serde::de::{self, MapAccess, SeqAccess, Visitor}; use std::fmt; struct TsPointVisitor { text_timestamp: bool, } // FIXME ugly hack to use postgres functions in an non-postgres library unsafe extern "C" { // this is only going to be used to communicate with a rust lib we compile with this one #[allow(improper_ctypes)] fn _ts_toolkit_decode_timestamptz(text: &str) -> i64; } impl<'de> Visitor<'de> for TsPointVisitor { type Value = TSPoint; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("struct TSPoint") } fn visit_seq(self, mut seq: V) -> Result where V: SeqAccess<'de>, { let ts = if self.text_timestamp { let text: &str = seq .next_element()? .ok_or_else(|| de::Error::invalid_length(0, &self))?; unsafe { _ts_toolkit_decode_timestamptz(text) } } else { seq.next_element()? .ok_or_else(|| de::Error::invalid_length(0, &self))? }; let val = seq .next_element()? .ok_or_else(|| de::Error::invalid_length(1, &self))?; Ok(TSPoint { ts, val }) } fn visit_map(self, mut map: V) -> Result where V: MapAccess<'de>, { #[derive(Deserialize)] #[serde(field_identifier, rename_all = "lowercase")] enum Field { Ts, Val, } let mut ts = None; let mut val = None; while let Some(key) = map.next_key()? { match key { Field::Ts => { if ts.is_some() { return Err(de::Error::duplicate_field("ts")); } ts = if self.text_timestamp { let text: &str = map.next_value()?; unsafe { Some(_ts_toolkit_decode_timestamptz(text)) } } else { Some(map.next_value()?) }; } Field::Val => { if val.is_some() { return Err(de::Error::duplicate_field("val")); } val = Some(map.next_value()?); } } } let ts = ts.ok_or_else(|| de::Error::missing_field("ts"))?; let val = val.ok_or_else(|| de::Error::missing_field("val"))?; Ok(TSPoint { ts, val }) } } const FIELDS: &[&str] = &["ts", "val"]; let visitor = TsPointVisitor { text_timestamp: deserializer.is_human_readable(), }; deserializer.deserialize_struct("TSPoint", FIELDS, visitor) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_linear_interpolate() { let p1 = TSPoint { ts: 1, val: 1.0 }; let p2 = TSPoint { ts: 3, val: 3.0 }; assert_eq!(p1.interpolate_linear(&p2, 2).unwrap(), 2.0); assert_eq!(p1.interpolate_linear(&p2, 3).unwrap(), 3.0); assert_eq!(p1.interpolate_linear(&p2, 4).unwrap(), 4.0); assert_eq!(p1.interpolate_linear(&p2, 0).unwrap(), 0.0); assert_eq!( p1.interpolate_linear(&p1, 2).unwrap_err(), TSPointError::TimesEqualInterpolate ); } } ================================================ FILE: crates/udd-sketch/Cargo.toml ================================================ [package] name = "uddsketch" version = "0.1.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] serde = { version = "1.0", features = ["derive"] } [dev-dependencies] ordered-float = {version = "1.0", features = ["serde"] } rand = "0.8.3" quickcheck = "1" quickcheck_macros = "1" ================================================ FILE: crates/udd-sketch/src/lib.rs ================================================ //! UDDSketch implementation in rust. //! Based on the paper: https://arxiv.org/abs/2004.08604 use serde::{Deserialize, Serialize}; use std::collections::hash_map::Entry; use std::collections::HashMap; use crate::SketchHashKey::Invalid; #[cfg(test)] use ordered_float::OrderedFloat; #[cfg(test)] use std::collections::HashSet; use std::num::NonZeroU32; #[cfg(test)] extern crate quickcheck; #[cfg(test)] #[macro_use(quickcheck)] extern crate quickcheck_macros; // This is used to index the buckets of the UddSketch. In particular, because UddSketch stores values // based on a logarithmic scale, we need to track negative values separately from positive values, and // zero also needs special casing. #[derive(Serialize, Deserialize, Hash, PartialEq, Eq, Copy, Clone, Debug)] pub enum SketchHashKey { Negative(i64), Zero, Positive(i64), Invalid, } // Invalid is treated as greater than valid values (making it a nice boundary value for list end) impl std::cmp::Ord for SketchHashKey { fn cmp(&self, other: &Self) -> std::cmp::Ordering { use self::SketchHashKey::*; use std::cmp::Ordering::*; match (self, other) { (Positive(a), Positive(b)) => a.cmp(b), (Negative(a), Negative(b)) => a.cmp(b).reverse(), (Positive(_), Negative(_) | Zero) => Greater, (Negative(_) | Zero, Positive(_)) => Less, (Zero, Negative(_)) => Greater, (Negative(_), Zero) => Less, (Zero, Zero) => Equal, (Invalid, Invalid) => Equal, (Invalid, Negative(_) | Zero | Positive(_)) => Greater, (Negative(_) | Zero | Positive(_), Invalid) => Less, } } } impl std::cmp::PartialOrd for SketchHashKey { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } /// `UDDSketchMetadata` was created to avoid passing along many parameters /// to function calls. pub struct UDDSketchMetadata { pub max_buckets: u32, pub current_error: f64, pub compactions: u8, pub values: u64, pub sum: f64, pub buckets: u32, } impl SketchHashKey { /// This is the key corresponding to the current key after the SketchHashMap it refers to has gone through one compaction. /// Note that odd buckets get combined with the bucket after them (i.e. old buckets -3 and -2 become new bucket -1, {-1, 0} -> 0, {1, 2} -> 1) fn compact_key(&self) -> SketchHashKey { use SketchHashKey::*; match *self { Negative(i64::MAX) => *self, // Infinite buckets remain infinite Positive(i64::MAX) => *self, Negative(x) => Negative(if x > 0 { x + 1 } else { x } / 2), Positive(x) => Positive(if x > 0 { x + 1 } else { x } / 2), x => x, // Zero and Invalid don't compact } } } // Entries in the SketchHashMap contain a count and the next valid index of the map. #[derive(Debug, Clone, PartialEq)] struct SketchHashEntry { count: u64, next: SketchHashKey, } // SketchHashMap is a special hash map of SketchHashKey->count that also keeps the equivalent of a linked list of the entries by increasing key value. #[derive(Debug, Clone, PartialEq)] struct SketchHashMap { map: HashMap, head: SketchHashKey, } impl std::ops::Index for SketchHashMap { type Output = u64; fn index(&self, id: SketchHashKey) -> &Self::Output { &self.map[&id].count } } // Iterator for a SketchHashMap will travel through the map in order of increasing key value and return the (key, count) pairs #[derive(Clone)] pub struct SketchHashIterator<'a> { container: &'a SketchHashMap, next_key: SketchHashKey, } impl Iterator for SketchHashIterator<'_> { type Item = (SketchHashKey, u64); fn next(&mut self) -> Option<(SketchHashKey, u64)> { if self.next_key == SketchHashKey::Invalid { None } else { let key = self.next_key; self.next_key = self.container.map[&self.next_key].next; Some((key, self.container[key])) } } } impl SketchHashMap { fn new() -> SketchHashMap { SketchHashMap { map: HashMap::new(), head: SketchHashKey::Invalid, } } fn with_capacity(capacity: usize) -> SketchHashMap { SketchHashMap { map: HashMap::with_capacity(capacity), head: SketchHashKey::Invalid, } } /// Increment the count at a key, creating the entry if needed. fn increment(&mut self, key: SketchHashKey) { self.entry_upsert(key, 1); } fn iter(&self) -> SketchHashIterator<'_> { SketchHashIterator { container: self, next_key: self.head, } } /// Splits an entry if `key` is supposed to come right after it /// Returns the key *after* the one that was split. #[inline] #[must_use] // The caller should really do something with this information. fn entry_split(&mut self, key: SketchHashKey) -> SketchHashKey { debug_assert_ne!( key, SketchHashKey::Invalid, "Invalid should never be used as a key into the SketchHashMap" ); let next: SketchHashKey; // Special case, if we're actually in front of the Head, // we're not really splitting the linked list, but prepending. if key < self.head { next = self.head; self.head = key; return next; } // Unfortunately, we'll now have to walk the whole map in order // to find the location where we should be inserted // into the single-linked list for (k, e) in self.map.iter_mut() { if *k < key && e.next > key { next = e.next; e.next = key; return next; } } unreachable!("Invalid key found"); } /// Upsert the given key/count into our map. This function /// ensures the Linked List is in good shape afterwards. #[inline] fn entry_upsert(&mut self, key: SketchHashKey, count: u64) { match self.map.entry(key) { Entry::Occupied(mut o) => { o.get_mut().count += count; // Great, we don't have to update the Linked List return; } Entry::Vacant(v) if self.head > key => { v.insert(SketchHashEntry { count, next: self.head, }); self.head = key; // Great, we don't have to update the Linked List return; } Entry::Vacant(_) => (), // We need to release our &mut map here, as we need to update 2 entries }; // We've just inserted a new value, but need to ensure we fix the linked list again. let new_next = self.entry_split(key); self.map.insert( key, SketchHashEntry { count, next: new_next, }, ); } fn len(&self) -> usize { self.map.len() } /// Combine adjacent buckets using the stack. fn compact_using_stack(&mut self) { let len = self.map.len(); debug_assert!(len <= N); let mut entries = [(SketchHashKey::Invalid, 0); N]; let mut drain = self.map.drain(); for e in entries.iter_mut() { if let Some((key, entry)) = drain.next() { *e = (key.compact_key(), entry.count); } else { break; } } drop(drain); self.populate_map_using_iter(&mut entries[0..len]) } /// This function will populate the backing map using the provided slice. /// It will sort and aggregate, so the caller does not need to take care /// of that. /// However, this should really only be called to populate the empty map. fn populate_map_using_iter(&mut self, entries: &mut [(SketchHashKey, u64)]) { assert!( self.map.is_empty(), "SketchHashMap should be empty when populating using a slice" ); if entries.is_empty() { return; } // To build up the linked list, we can do so by calling `entry_upsert` for every call // to the `HashMap`. `entry_upsert` however needs to walk the map though to figure // out where to place a key, therefore, we switch to: // - sort // - aggregate // - insert // That's what we do here // - sort entries.sort_unstable_by_key(|e| e.0); // - aggregate let mut old_index = 0; let mut current = entries[0]; for idx in 1..entries.len() { let next = entries[idx]; if next.0 == current.0 { current.1 += next.1; } else { entries[old_index] = current; current = next; old_index += 1; } } // Final one entries[old_index] = current; // We should only return the slice containing the aggregated values let iter = entries.iter_mut().take(old_index + 1).peekable(); let mut iter = iter.peekable(); self.head = iter.peek().map(|p| p.0).unwrap_or(Invalid); // - insert while let Some((key, count)) = iter.next() { self.map.insert( *key, SketchHashEntry { count: *count, next: iter.peek().map(|p| p.0).unwrap_or(Invalid), }, ); } } #[inline] fn compact(&mut self) { match self.len() { 0 => (), // PERCENTILE_AGG_DEFAULT_SIZE defaults to 200, so // this entry covers that case. 1..=200 => self.compact_using_stack::<200>(), 201..=1000 => self.compact_using_stack::<1000>(), 1001..=5000 => self.compact_using_stack::<5000>(), _ => self.compact_using_heap(), } } // Combine adjacent buckets fn compact_using_heap(&mut self) { let mut entries = Vec::with_capacity(self.map.len()); // By draining the `HashMap`, we can reuse the same piece of memory after we're done. // We're only using the `Vec` for a very short-lived period of time. entries.extend(self.map.drain().map(|e| (e.0.compact_key(), e.1.count))); self.populate_map_using_iter(&mut entries) } } #[derive(Clone, Debug, PartialEq)] pub struct UDDSketch { buckets: SketchHashMap, alpha: f64, gamma: f64, compactions: u8, // should always be smaller than 64 max_buckets: NonZeroU32, num_values: u64, values_sum: f64, } impl UDDSketch { pub fn new(max_buckets: u32, initial_error: f64) -> Self { assert!((1e-12..1.0).contains(&initial_error)); UDDSketch { buckets: SketchHashMap::new(), alpha: initial_error, gamma: (1.0 + initial_error) / (1.0 - initial_error), compactions: 0, max_buckets: NonZeroU32::new(max_buckets) .expect("max buckets should be greater than zero"), num_values: 0, values_sum: 0.0, } } // This constructor is used to recreate a UddSketch from its component data pub fn new_from_data( metadata: &UDDSketchMetadata, keys: impl Iterator, mut counts: impl Iterator, ) -> Self { let capacity = metadata.buckets as usize; let mut sketch = UDDSketch { buckets: SketchHashMap::with_capacity(capacity), alpha: metadata.current_error, gamma: gamma(metadata.current_error), compactions: metadata.compactions, max_buckets: NonZeroU32::new(metadata.max_buckets) .expect("max buckets should be greater than zero"), num_values: metadata.values, values_sum: metadata.sum, }; let mut keys = keys.into_iter().peekable(); if let Some(key) = keys.peek() { sketch.buckets.head = *key; } // This assumes the keys are unique and sorted while let (Some(key), Some(count)) = (keys.next(), counts.next()) { let next = keys.peek().copied().unwrap_or(Invalid); sketch .buckets .map .insert(key, SketchHashEntry { next, count }); } debug_assert_eq!(sketch.buckets.map.len(), capacity); sketch } } impl UDDSketch { // For a given value return the index of it's bucket in the current sketch. fn key(&self, value: f64) -> SketchHashKey { key(value, self.gamma) } pub fn compact_buckets(&mut self) { self.buckets.compact(); self.compactions += 1; self.gamma *= self.gamma; // See https://arxiv.org/pdf/2004.08604.pdf Equation 3 self.alpha = 2.0 * self.alpha / (1.0 + self.alpha.powi(2)); // See https://arxiv.org/pdf/2004.08604.pdf Equation 4 } pub fn bucket_iter(&self) -> SketchHashIterator<'_> { self.buckets.iter() } } impl UDDSketch { pub fn add_value(&mut self, value: f64) { self.buckets.increment(self.key(value)); while self.buckets.len() > self.max_buckets.get() as usize { self.compact_buckets(); } self.num_values += 1; self.values_sum += value; } /// `merge_items` will merge these values into the current sketch /// it requires less memory than `merge_sketch`, as that needs a fully serialized /// `UDDSketch`, whereas this function relies on iterators to do its job. pub fn merge_items( &mut self, other: &UDDSketchMetadata, mut keys: impl Iterator, mut counts: impl Iterator, ) { let other_gamma = gamma(other.current_error); // Require matching initial parameters debug_assert!( (self .gamma .powf(1.0 / f64::powi(2.0, self.compactions as i32)) - other_gamma.powf(1.0 / f64::powi(2.0, other.compactions as i32))) .abs() < 1e-9 // f64::EPSILON too small, see issue #396 ); debug_assert_eq!(self.max_buckets.get(), other.max_buckets); if other.values == 0 { return; } while self.compactions < other.compactions { self.compact_buckets(); } let extra_compactions = self.compactions - other.compactions; while let (Some(mut key), Some(count)) = (keys.next(), counts.next()) { for _ in 0..extra_compactions { key = key.compact_key(); } self.buckets.entry_upsert(key, count); } while self.buckets.len() > self.max_buckets.get() as usize { self.compact_buckets(); } self.num_values += other.values; self.values_sum += other.sum; } pub fn merge_sketch(&mut self, other: &UDDSketch) { // Require matching initial parameters assert!( (self .gamma .powf(1.0 / f64::powi(2.0, self.compactions as i32)) - other .gamma .powf(1.0 / f64::powi(2.0, other.compactions as i32))) .abs() < 1e-9 // f64::EPSILON too small, see issue #396 ); assert!(self.max_buckets == other.max_buckets); if other.num_values == 0 { return; } if self.num_values == 0 { *self = other.clone(); return; } let mut other = other.clone(); while self.compactions > other.compactions { other.compact_buckets(); } while other.compactions > self.compactions { self.compact_buckets(); } for entry in other.buckets.iter() { let (key, value) = entry; self.buckets.entry_upsert(key, value); } while self.buckets.len() > self.max_buckets.get() as usize { self.compact_buckets(); } self.num_values += other.num_values; self.values_sum += other.values_sum; } pub fn max_allowed_buckets(&self) -> u32 { self.max_buckets.get() } pub fn times_compacted(&self) -> u8 { self.compactions } pub fn current_buckets_count(&self) -> usize { self.buckets.map.len() } } impl UDDSketch { #[inline] pub fn mean(&self) -> f64 { if self.num_values == 0 { 0.0 } else { self.values_sum / self.num_values as f64 } } #[inline] pub fn sum(&self) -> f64 { self.values_sum } #[inline] pub fn count(&self) -> u64 { self.num_values } #[inline] pub fn max_error(&self) -> f64 { self.alpha } pub fn estimate_quantile(&self, quantile: f64) -> f64 { estimate_quantile( quantile, self.alpha, self.gamma, self.num_values, self.buckets.iter(), ) } pub fn estimate_quantile_at_value(&self, value: f64) -> f64 { estimate_quantile_at_value(value, self.gamma, self.num_values, self.buckets.iter()) } } pub fn estimate_quantile( quantile: f64, alpha: f64, gamma: f64, num_values: u64, buckets: impl Iterator, ) -> f64 { assert!((0.0..=1.0).contains(&quantile)); let mut remaining = (num_values as f64 * quantile) as u64 + 1; if remaining >= num_values { return last_bucket_value(alpha, gamma, buckets); } for entry in buckets { let (key, count) = entry; if remaining <= count { return bucket_to_value(alpha, gamma, key); } else { remaining -= count; } } unreachable!(); } // Look up the value of the last bucket // This is not an efficient operation fn last_bucket_value( alpha: f64, gamma: f64, buckets: impl Iterator, ) -> f64 { let (key, _) = buckets.last().unwrap(); bucket_to_value(alpha, gamma, key) } /// inverse of `key()` within alpha fn bucket_to_value(alpha: f64, gamma: f64, bucket: SketchHashKey) -> f64 { // When taking gamma ^ i below we have to use powf as powi only takes a u32, and i can exceed 2^32 for small alphas match bucket { SketchHashKey::Zero => 0.0, SketchHashKey::Positive(i) => gamma.powf(i as f64 - 1.0) * (1.0 + alpha), SketchHashKey::Negative(i) => -(gamma.powf(i as f64 - 1.0) * (1.0 + alpha)), SketchHashKey::Invalid => panic!("Unable to convert invalid bucket id to value"), } } pub fn estimate_quantile_at_value( value: f64, gamma: f64, num_values: u64, buckets: impl Iterator, ) -> f64 { let mut count = 0.0; let target = key(value, gamma); for entry in buckets { let (key, value) = entry; if target > key { count += value as f64; } else { if target == key { // If the value falls in the target bucket, assume it's greater than half the other values count += value as f64 / 2.0; } return count / num_values as f64; } } 1.0 // Greater than anything in the sketch } fn key(value: f64, gamma: f64) -> SketchHashKey { let negative = value < 0.0; let value = value.abs(); if value == 0.0 { SketchHashKey::Zero } else if negative { SketchHashKey::Negative(value.log(gamma).ceil() as i64) } else { SketchHashKey::Positive(value.log(gamma).ceil() as i64) } } pub fn gamma(alpha: f64) -> f64 { (1.0 + alpha) / (1.0 - alpha) } #[cfg(test)] mod tests { use rand::{Rng, SeedableRng}; use super::*; #[test] fn build_and_add_values() { let mut sketch = UDDSketch::new(20, 0.1); sketch.add_value(1.0); sketch.add_value(3.0); sketch.add_value(0.5); assert_eq!(sketch.count(), 3); assert_eq!(sketch.mean(), 1.5); assert_eq!(sketch.max_error(), 0.1); } #[test] fn exceed_buckets() { let mut sketch = UDDSketch::new(20, 0.1); sketch.add_value(1.1); // Bucket #1 sketch.add_value(400.0); // Bucket #30 let a2 = 0.2 / 1.01; assert_eq!(sketch.count(), 2); assert_eq!(sketch.max_error(), 0.1); for i in 2..20 { sketch.add_value(1000.0 * 1.23_f64.powi(i)); } assert_eq!(sketch.count(), 20); assert_eq!(sketch.max_error(), 0.1); for i in 20..30 { sketch.add_value(1000.0 * 1.23_f64.powi(i)); } assert_eq!(sketch.count(), 30); assert_eq!(sketch.max_error(), a2); } /// We create this `merge_verifier` so that every test we run also tests /// the multiple implementations we have for merging sketches. /// It is a drop-in replacement for `merge_sketches`, with additional asserts. fn merge_verifier(sketch: &mut UDDSketch, other: &UDDSketch) { let mut second = sketch.clone(); sketch.merge_sketch(other); let mut keys = Vec::with_capacity(other.num_values as usize); let mut counts = Vec::with_capacity(other.num_values as usize); for (key, count) in other.buckets.iter() { keys.push(key); counts.push(count); } let metadata = UDDSketchMetadata { max_buckets: other.max_buckets.get(), current_error: other.alpha, compactions: other.compactions, values: other.num_values, sum: other.values_sum, buckets: other.buckets.map.len() as u32, }; second.merge_items(&metadata, keys.into_iter(), counts.into_iter()); // Both methods should result in the same end result. assert_eq!(*sketch, second); } #[test] fn merge_sketches() { let a1 = 0.1; // alpha for up to 20 buckets let a2 = 0.2 / 1.01; // alpha for 1 compaction let a3 = 2.0 * a2 / (1.0 + f64::powi(a2, 2)); // alpha for 2 compactions let a4 = 2.0 * a3 / (1.0 + f64::powi(a3, 2)); // alpha for 3 compactions let a5 = 2.0 * a4 / (1.0 + f64::powi(a4, 2)); // alpha for 4 compactions let mut sketch1 = UDDSketch::new(20, 0.1); sketch1.add_value(1.1); // Bucket #1 sketch1.add_value(1.5); // Bucket #3 sketch1.add_value(1.6); // Bucket #3 sketch1.add_value(1.3); // Bucket #2 sketch1.add_value(4.2); // Bucket #8 assert_eq!(sketch1.count(), 5); assert_eq!(sketch1.max_error(), a1); let mut sketch2 = UDDSketch::new(20, 0.1); sketch2.add_value(5.1); // Bucket #9 sketch2.add_value(7.5); // Bucket #11 sketch2.add_value(10.6); // Bucket #12 sketch2.add_value(9.3); // Bucket #12 sketch2.add_value(11.2); // Bucket #13 assert_eq!(sketch2.max_error(), a1); merge_verifier(&mut sketch1, &sketch2); assert_eq!(sketch1.count(), 10); assert_eq!(sketch1.max_error(), a1); let mut sketch3 = UDDSketch::new(20, 0.1); sketch3.add_value(0.8); // Bucket #-1 sketch3.add_value(3.7); // Bucket #7 sketch3.add_value(15.2); // Bucket #14 sketch3.add_value(3.4); // Bucket #7 sketch3.add_value(0.6); // Bucket #-2 assert_eq!(sketch3.max_error(), a1); merge_verifier(&mut sketch1, &sketch3); assert_eq!(sketch1.count(), 15); assert_eq!(sketch1.max_error(), a1); let mut sketch4 = UDDSketch::new(20, 0.1); sketch4.add_value(400.0); // Bucket #30 sketch4.add_value(0.004); // Bucket #-27 sketch4.add_value(0.0); // Zero Bucket sketch4.add_value(-400.0); // Neg. Bucket #30 sketch4.add_value(-0.004); // Neg. Bucket #-27 sketch4.add_value(400000000000.0); // Some arbitrary large bucket sketch4.add_value(0.00000005); // Some arbitrary small bucket sketch4.add_value(-400000000000.0); // Some arbitrary large neg. bucket sketch4.add_value(-0.00000005); // Some arbitrary small neg. bucket assert_eq!(sketch4.max_error(), a1); merge_verifier(&mut sketch1, &sketch4); assert_eq!(sketch1.count(), 24); assert_eq!(sketch1.max_error(), a2); let mut sketch5 = UDDSketch::new(20, 0.1); for i in 100..220 { sketch5.add_value(1.23_f64.powi(i)); } assert_eq!(sketch5.max_error(), a4); merge_verifier(&mut sketch1, &sketch5); assert_eq!(sketch1.count(), 144); assert_eq!(sketch1.max_error(), a5); // Note that each compaction doesn't always result in half the numbers of buckets, hence a5 here instead of a4 } #[test] fn test_quantile_and_value_estimates() { let mut sketch = UDDSketch::new(50, 0.1); for v in 1..=10000 { sketch.add_value(v as f64 / 100.0); } assert_eq!(sketch.count(), 10000); assert_eq!(sketch.max_error(), 0.1); for i in 1..=100 { let value = i as f64; let quantile = value / 100.0; let quantile_value = value + 0.01; // correct value for quantile should be next number > value let test_value = sketch.estimate_quantile(quantile); let test_quant = sketch.estimate_quantile_at_value(value); let percentage = (test_value - quantile_value).abs() / quantile_value; assert!( percentage <= 0.1, "Exceeded 10% error on quantile {}: expected {}, received {} (error% {})", quantile, quantile_value, test_value, (test_value - quantile_value).abs() / quantile_value ); let percentage = (test_quant - quantile).abs() / quantile; assert!( percentage < 0.2, "Exceeded 20% error on quantile at value {}: expected {}, received {} (error% {})", value, quantile, test_quant, (test_quant - quantile).abs() / quantile ); } assert!((sketch.mean() - 50.005).abs() < 0.001); } #[test] fn test_extreme_quantile_at_value() { let mut sketch = UDDSketch::new(50, 0.1); for v in 1..=10000 { sketch.add_value(v as f64 / 100.0); } assert_eq!(sketch.estimate_quantile_at_value(-100.0), 0.0); assert_eq!(sketch.estimate_quantile_at_value(0.0), 0.0); assert_eq!(sketch.estimate_quantile_at_value(0.0001), 0.0); assert_eq!(sketch.estimate_quantile_at_value(1000.0), 1.0); assert!(sketch.estimate_quantile_at_value(0.01) < 0.0001); assert!(sketch.estimate_quantile_at_value(100.0) > 0.9); } #[test] fn random_stress() { let mut sketch = UDDSketch::new(1000, 0.01); let seed = rand::thread_rng().gen_range(0..u64::MAX); let mut rng = rand::rngs::StdRng::seed_from_u64(seed); let mut bounds = Vec::new(); for _ in 0..100 { let v = rng.gen_range(-1000000.0..1000000.0); sketch.add_value(v); bounds.push(v); } bounds.sort_by(|a, b| a.partial_cmp(b).unwrap()); let mut prev = -2000000.0; for f in bounds.iter() { for _ in 0..10000 { sketch.add_value(rng.gen_range(prev..*f)); } prev = *f; } for i in 0..100 { assert!(((sketch.estimate_quantile((i as f64 + 1.0) / 100.0) / bounds[i]) - 1.0).abs() < sketch.max_error() * bounds[i].abs(), "Failed to correct match {} quantile with seed {}. Received: {}, Expected: {}, Error: {}, Expected error bound: {}", (i as f64 + 1.0) / 100.0, seed, sketch.estimate_quantile((i as f64 + 1.0) / 100.0), bounds[i], ((sketch.estimate_quantile((i as f64 + 1.0) / 100.0) / bounds[i]) - 1.0).abs() / bounds[i].abs(), sketch.max_error()); } } use crate::SketchHashKey::Invalid; use quickcheck::*; #[derive(Hash, PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug)] struct OrderedF64(OrderedFloat); impl Arbitrary for OrderedF64 { fn arbitrary(g: &mut Gen) -> Self { OrderedF64(f64::arbitrary(g).into()) } } #[test] #[should_panic] fn test_entry_invalid_hashmap_key() { let mut map = SketchHashMap { map: HashMap::new(), head: Invalid, }; map.entry_upsert(Invalid, 0); } #[test] fn test_entry_insertion_order() { let mut map = SketchHashMap { map: HashMap::new(), head: Invalid, }; map.entry_upsert(SketchHashKey::Negative(i64::MIN), 5); map.entry_upsert(SketchHashKey::Negative(10), 1); map.entry_upsert(SketchHashKey::Positive(i64::MAX - 100), 17); map.entry_upsert(SketchHashKey::Zero, 7); map.entry_upsert(SketchHashKey::Positive(-10), 11); map.entry_upsert(SketchHashKey::Negative(-10), 3); map.entry_upsert(SketchHashKey::Positive(10), 13); let keys: Vec<_> = map.iter().collect::>(); assert_eq!( keys, vec![ (SketchHashKey::Negative(10), 1), (SketchHashKey::Negative(-10), 3), (SketchHashKey::Negative(i64::MIN), 5), (SketchHashKey::Zero, 7), (SketchHashKey::Positive(-10), 11), (SketchHashKey::Positive(10), 13), (SketchHashKey::Positive(i64::MAX - 100), 17), ] ); // We add some things before the current head, insert some new ones, // add some to the end, and again inbetween some others map.entry_upsert(SketchHashKey::Negative(i64::MAX), 3); map.entry_upsert(SketchHashKey::Negative(-10), 23); map.entry_upsert(SketchHashKey::Positive(9), 29); map.entry_upsert(SketchHashKey::Positive(i64::MAX), 8); map.entry_upsert(SketchHashKey::Positive(10), 123); map.entry_upsert(SketchHashKey::Positive(11), 31); let keys: Vec<_> = map.iter().collect::>(); assert_eq!( keys, vec![ (SketchHashKey::Negative(i64::MAX), 3), (SketchHashKey::Negative(10), 1), (SketchHashKey::Negative(-10), 26), // 3 + 23 (SketchHashKey::Negative(i64::MIN), 5), (SketchHashKey::Zero, 7), (SketchHashKey::Positive(-10), 11), (SketchHashKey::Positive(9), 29), (SketchHashKey::Positive(10), 136), // 13 + 123 (SketchHashKey::Positive(11), 31), (SketchHashKey::Positive(i64::MAX - 100), 17), (SketchHashKey::Positive(i64::MAX), 8), ] ); } #[quickcheck] // Use multiple hashsets as input to allow a small number of duplicate values without getting ridiculous levels of duplication (as quickcheck is inclined to create) fn fuzzing_test( batch1: HashSet, batch2: HashSet, batch3: HashSet, batch4: HashSet, ) -> TestResult { let mut master: Vec = batch1 .into_iter() .chain(batch2.into_iter()) .chain(batch3.into_iter()) .chain(batch4.into_iter()) .map(|x| x.0.into()) .filter(|x: &f64| !x.is_nan()) .collect(); if master.len() < 100 { return TestResult::discard(); } let mut sketch = UDDSketch::new(100, 0.000001); for value in &master { sketch.add_value(*value); } let quantile_tests = [0.01, 0.1, 0.25, 0.5, 0.6, 0.8, 0.95]; master.sort_by(|a, b| a.partial_cmp(b).unwrap()); for i in 0..quantile_tests.len() { let quantile = quantile_tests[i]; let mut test_val = sketch.estimate_quantile(quantile); // If test_val is infinite, use the most extreme finite value to test relative error if test_val.is_infinite() { if test_val.is_sign_negative() { test_val = f64::MIN; } else { test_val = f64::MAX; } } // Compute target quantile using nearest rank definition let master_idx = (quantile * master.len() as f64).floor() as usize; let target = master[master_idx]; if target.is_infinite() { continue; // trivially correct...or NaN, depending how you define it } let error = if target == 0.0 { test_val } else { (test_val - target).abs() / target.abs() }; assert!(error <= sketch.max_error(), "sketch with error {} estimated {} quantile as {}, true value is {} resulting in relative error {} values: {:?}", sketch.max_error(), quantile, test_val, target, error, master); } TestResult::passed() } } ================================================ FILE: docker/README.md ================================================ # Docker images To speed up builds, we are using a set of pre-build docker images and the Docker files for that is present in this directory. ## Pre-requisites You need to have Docker installed with support for DockerKit multi-platform and activate it by setting environment variable `DOCKER_BUILDKIT=1`. ```bash apt-get install docker.io ``` ## Building multi-platform images To build a new Docker image `toolkit-builder` for multiple platforms and push it to the development repository: ```bash ARCH=amd64 OS_NAME=debian OS_VERSION=11 OS_CODE_NAME=bullseye DOCKER_BUILDKIT=1 docker build \ --platform $ARCH \ --build-arg ARCH=$ARCH \ --build-arg OS_NAME=$OS_NAME \ --build-arg OS_VERSION=$OS_VERSION \ --build-arg OS_CODE_NAME=$OS_CODE_NAME \ -f docker/ci/Dockerfile \ -t timescaledev/toolkit-builder-test:$OS_NAME-$OS_VERSION-$ARCH \ . ``` We publish the images as `timescaledev/toolkit-builder` instead of `timescaledev/toolkit-builder-test` after testing. ## Troubleshooting If you get the following error when pushing: ``` $ docker buildx build --platform linux/arm64/v8,linux/amd64 --tag timescaledev/toolkit-builder-test:latest --push . [+] Building 487.0s (54/54) FINISHED => [internal] load .dockerignore 0.0s => => transferring context: 2B 0.0s . . . => [auth] timescaledev/toolkit-builder-test:pull,push token for registry-1.docker.io 0.0s ------ > exporting to image: ------ error: failed to solve: failed to fetch oauth token: Post "https://auth.docker.io/token": x509: certificate has expired or is not yet valid: current time 2022-07-28T07:19:52+01:00 is after 2018-04-29T13:06:19Z ``` You may have better luck with buildx instead of BuildKit. Install from https://github.com/docker/buildx and then: ```bash export DOCKER_BUILDKIT=0 docker buildx build --platform linux/arm64/v8,linux/amd64 --tag timescaledev/toolkit-builder-test:latest --push . ``` ================================================ FILE: docker/ci/Dockerfile ================================================ ARG ARCH ARG OS_NAME ARG OS_VERSION # Without DockerKit, this doesn't work, even though documentation suggests it should. # With DockerKit, TARGETARCH is supposed to come in for free, but that doesn't work either. FROM --platform=${ARCH} ${OS_NAME}:${OS_VERSION} AS toolkit-base ARG ARCH ARG OS_CODE_NAME # Docker requires we redeclare these after FROM ¯\_(ツ)_/¯ ARG OS_NAME ARG OS_VERSION ENV HOME=/home/postgres # Docker fails to set LOGNAME :( ENV LOGNAME=root ENV CARGO_HOME=/usr/local/cargo ENV RUSTUP_HOME=/usr/local/rustup ENV PATH="${CARGO_HOME}/bin:/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin" COPY docker/ci/setup.sh / COPY tools/dependencies.sh / # TODO simple option processing a la build and testbin would make this less error-prone RUN /setup.sh ${ARCH} ${OS_NAME} ${OS_VERSION} "${OS_CODE_NAME}" postgres ${HOME} # TODO What does this 'AS' do? It doesn't seem to name it. We need -t for that. FROM toolkit-base AS toolkit-builder WORKDIR ${HOME} # Leave USER root for Github Actions. ================================================ FILE: docker/ci/setup.sh ================================================ #!/bin/sh # TODO rename to tools/setup - this is useful even for developer setup (add Mac/brew support) set -ex if [ -z "$CARGO_HOME" ] || [ -z "$RUSTUP_HOME" ]; then echo >&2 'CARGO_HOME and RUSTUP_HOME environment variables must be set' exit 3 fi if [ "$1" = -unprivileged ]; then privileged=false shift else privileged=true fi if [ $# -ne 6 ]; then echo >&2 'usage: setup.sh ARCH OS_NAME OS_VERSION OS_CODE_NAME BUILDER_USERNAME BUILDER_HOME' exit 2 fi ARCH=$1 OS_NAME=$2 OS_VERSION=$3 OS_CODE_NAME=$4 BUILDER_USERNAME=$5 BUILDER_HOME=$6 . /dependencies.sh # Phase 0 - set platform-specific parameters case $OS_NAME in rockylinux) PG_BASE=/usr/pgsql- ;; debian | ubuntu) PG_BASE=/usr/lib/postgresql/ ;; *) echo >&2 "unsupported $OS_NAME" exit 4 ;; esac if $privileged; then # Phase 1 - cross-platform prerequisites useradd -u 1001 -md "$BUILDER_HOME" $BUILDER_USERNAME # Phase 2 - platform-specific package installation case $OS_NAME in # Red Hat Enterprise derivatives rockylinux) case $OS_VERSION in 8) yum -qy install dnf-plugins-core dnf config-manager --enable powertools dnf -qy module disable postgresql # fpm suddenly requires newer public_suffix that requires newer ruby # https://github.com/jordansissel/fpm/issues/1923 ¯\_(ツ)_/¯ dnf -qy module enable ruby:3.1 dnf -qy install ruby-devel rubygems ;; 9) yum -qy install dnf-plugins-core dnf config-manager --enable crb dnf -qy install ruby-devel rubygems ;; *) echo >&2 'only 7 - 9 supported' exit 5 ;; esac # pgrx needs: # - gcc (specifically; clang won't do!) # - openssl-devel # - make # - pkg-config yum -q -y install \ gcc \ git \ make \ openssl-devel \ pkg-config \ rpm-build \ sudo # Setup the postgresql.org package repository. yum -q -y install https://download.postgresql.org/pub/repos/yum/reporpms/EL-${OS_VERSION}-${ARCH}/pgdg-redhat-repo-latest.noarch.rpm # Setup the timescaledb package repository. cat > /etc/yum.repos.d/timescale_timescaledb.repo < /etc/apt/sources.list.d/timescaledb.list < /etc/environment "PATH=$PATH" ;; esac # Phase 3 - cross-platform privileged tasks after package installation # We've benefitted from being able to test expansions to our cargo # installation without having to rebuild the CI image before, so # donate the cargo installation to the builder user. install -d -o $BUILDER_USERNAME "$CARGO_HOME" "$RUSTUP_HOME" # We'll run tools/testbin as this user, and it needs to (un)install packages. echo "$BUILDER_USERNAME ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers cd "$BUILDER_HOME" exec sudo -H --preserve-env=PATH,CARGO_HOME,RUSTUP_HOME -u $BUILDER_USERNAME "$0" -unprivileged "$@" fi # Phase 4 - unprivileged cross-platform tasks curl -s https://sh.rustup.rs | sh -s -- -q -y --no-modify-path --default-toolchain $RUST_TOOLCHAIN --profile $RUST_PROFILE -c $RUST_COMPONENTS # Install pgrx cargo install cargo-pgrx --version =$PGRX_VERSION # Configure pgrx ## `cargo pgrx init` is not additive; must specify all versions in one command. for pg_config in $(find /usr -name 'pg_config' -type f | grep "${PG_BASE}"); do pg="$(${pg_config} --version | awk -F '[ .]' '{print $2'})" init_flags="$init_flags --pg$pg ${pg_config}" done cargo pgrx init $init_flags ## Initialize pgrx-managed databases so we can add the timescaledb load, but only ## for those PostgreSQL versions that have the timescaledb.so library available. for pg_config in $(find /usr -name 'pg_config' -type f | grep "${PG_BASE}"); do pg="$(${pg_config} --version | awk -F '[ .]' '{print $2'})" lib="$(find "${PG_BASE}${pg}" -type f -name 'timescaledb.so')" if [ "${lib}" != "" ]; then echo "shared_preload_libraries = 'timescaledb'" >> ~/.pgrx/data-$pg/postgresql.conf fi done # Clone and fetch dependencies so we builds have less work to do. git clone https://github.com/timescale/timescaledb-toolkit cd timescaledb-toolkit cargo fetch ================================================ FILE: docs/README.md ================================================ # TimescaleDB Toolkit Documentation --- The TimescaleDB Toolkit project contains a number of utilities for working with time-series data. This documentation is further broken down by utility or feature in the list [below](#toolkit-features). ## A note on tags Functionality within the TimescaleDB Toolkit repository is intended to be introduced in varying stages of completeness. To clarify which releases a given feature or function can be found in, the following tags are used: - **Experimental** - Denotes functionality that is still under very active development and may have poor performance, not handle corner cases or errors, etc. Experimental APIs will change across releases, and extension-update will drop database objects that depend on experimental features. Do not use them unless you're willing to deal with the object you've created (the view, table, continuous aggregates, function, etc.) being dropped on update. This is particularly important for managed cloud services (like Timescale Cloud) that automate upgrades. Experimental features and functions can be found exclusively in the `toolkit_experimental` schema. - **Stable** ***release id*** - Functionality in this state should be correct and performant. Stable APIs will be found in our releases and should not be broken in future releases. Note that this tag will also be accompanied with the version in which the feature was originally released, such as: Feature Foostable-1.2. - **Deprecated** - It may be necessary to remove stable functionality at some point, for instance if it is being supplanted by newer functionality or if it has deprecated dependencies. Functionality with this tag is expected to be removed in future releases and current users of it should move to alternatives. Note that tags can be applied at either a feature or function scope. The function tag takes precedence, but defaults to the feature scope if not present. For example, if we have a feature `Foo` which is tagged `stable`, we would assume that an untagged function `FooCount` within that feature would be present in the current beta release. However, if function `FooSum` were explicitly tagged `experimental` then we would only expect to find it in the nightly build. ## Features The following links lead to pages for the different features in the TimescaleDB Toolkit repository. - [ASAP Smoothing](asap.md) [experimental](/docs/README.md#tag-notes) - A data smoothing algorithm designed to generate human readable graphs which maintain any erratic data behavior while smoothing away the cyclic noise. - [Hyperloglog](hyperloglog.md) [experimental](/docs/README.md#tag-notes) – An approximate `COUNT DISTINCT` based on hashing that provides reasonable accuracy in constant space. ([Methods](hyperloglog.md#hyperloglog_api)) - [LTTB](lttb.md) [experimental](/docs/README.md#tag-notes) – A downsample method that preserves visual similarity. ([Methods](lttb.md#api)) - [Percentile Approximation](percentile_approximation.md) - A simple percentile approximation interface [([Methods](percentile_approximation.md#api))], wraps and simplifies the lower level algorithms: - [T-Digest](tdigest.md) – A quantile estimate sketch optimized to provide more accurate estimates near the tails (i.e. 0.001 or 0.995) than conventional approaches. ([Methods](tdigest#tdigest_api)) - [UddSketch](uddsketch.md) – A quantile estimate sketch which provides a guaranteed maximum relative error. ([Methods](uddsketch.md#uddsketch_api)) ================================================ FILE: docs/asap.md ================================================ # ASAP Smoothing [experimental](/docs/README.md#tag-notes) > [Description](#asap-description)
> [Details](#asap-details)
> [Example](#asap-example)
> [API](#asap-api) ## Description The [ASAP smoothing algorithm](https://arxiv.org/pdf/1703.00983.pdf) is designed create human readable graphs which preserve the rough shape and larger trends of the input data while minimizing the local variance between points. TimescaleDB Toolkit provides an implementation of this which will take `(timestamp, value)` pairs, normalize them to the target interval, and return the ASAP smoothed values. ## Details Timescale's ASAP smoothing is implemented as a PostgresQL aggregate over a series of timestamps and values, with an additional target resolution used to control the output size. The implementation will take the incoming data and attempt to bucket the points into even sized buckets such the number of buckets approximates the target resolution and each bucket contains a similar number of points (if necessary, gaps will be filled by interpolating the buckets on either side at this point). It will then attempt to identify good candidate intervals for smoothing the data (using the Wiener-Khinchin theorem to find periods of high autocorrelation), and then choose the candidate that produces the smoothest graph while having the same degree of outlier values. The output of the postgres aggregate is a timescale timevector object describing the start and step interval times and listing the values. This can be passed to our `unnest` API to produce a table of time, value points. The aggreates are also currently not partializeable or combinable. ## Usage Example In this example we're going to examine about 250 years of monthly temperature readings from England (raw data can be found [here](http://futuredata.stanford.edu/asap/Temp.csv), though timestamps need to have a day added to be readable by PostgresQL). ```SQL ,ignore CREATE TABLE temperatures(month TIMESTAMPTZ, value DOUBLE PRECISION); COPY temperatures from 'temperature.csv' CSV HEADER; SELECT * FROM temperatures ORDER BY month LIMIT 10; ``` ``` month | value ------------------------------+------- 1723-01-01 00:00:00-07:52:58 | 1.1 1723-02-01 00:00:00-07:52:58 | 4.4 1723-03-01 00:00:00-07:52:58 | 7.5 1723-04-01 00:00:00-07:52:58 | 8.9 1723-05-01 00:00:00-07:52:58 | 11.7 1723-06-01 00:00:00-07:52:58 | 15 1723-07-01 00:00:00-07:52:58 | 15.3 1723-08-01 00:00:00-07:52:58 | 15.6 1723-09-01 00:00:00-07:52:58 | 13.3 1723-10-01 00:00:00-07:52:58 | 11.1 (10 rows) ``` It is hard to look at this data and make much sense of how the temperature has changed over that time. Here is a graph of the raw data: ![Raw data](images/ASAP_raw.png) We can use ASAP smoothing here to get a much clearer picture of the behavior over this interval. ```SQL ,ignore SELECT * FROM unnest((SELECT asap_smooth(month, value, 800) FROM temperatures)); ``` ``` time | value -------------------------------------+------------------- 1723-01-01 00:00:00-07:52:58 | 9.51550387596899 1723-04-12 21:38:55.135135-07:52:58 | 9.4890503875969 1723-07-23 19:17:50.27027-07:52:58 | 9.41656976744186 1723-11-02 16:56:45.405405-07:52:58 | 9.429360465116277 1724-02-12 14:35:40.54054-07:52:58 | 9.473546511627905 1724-05-24 12:14:35.675675-07:52:58 | 9.439341085271316 1724-09-03 09:53:30.81081-07:52:58 | 9.409496124031007 1724-12-14 07:32:25.945945-07:52:58 | 9.435465116279067 1725-03-26 05:11:21.08108-07:52:58 | 9.44864341085271 1725-07-06 02:50:16.216215-07:52:58 | 9.43003875968992 1725-10-16 00:29:11.35135-07:52:58 | 9.423062015503874 1726-01-25 22:08:06.486485-07:52:58 | 9.47771317829457 1726-05-07 19:47:01.62162-07:52:58 | 9.515310077519377 1726-08-17 17:25:56.756755-07:52:58 | 9.47383720930232 ... ``` Note the use of the `unnest` here to unpack the results of the `asap_smooth` command. The output of this command is ~800 points of smoothed data (in this case it ended up being 888 points each representing a rolling moving average of about 21.5 years). We can view of graph of these values to get a much clearer picture of how the temperature has fluctuated over this time: ![Smoothed data](images/ASAP_smoothed.png) ## Command List (A-Z) > - [asap_smooth](#asap_smooth) --- ## **asap_smooth** ```SQL ,ignore asap_smooth( ts TIMESTAMPTZ, value DOUBLE PRECISION, resolution INT ) RETURNS NormalizedTimevector ``` This normalize time, value pairs over a given interval and return a smoothed representation of those points. ### Required Arguments |Name| Type |Description| |---|---|---| | `ts` | `TIMESTAMPTZ` | Column of timestamps corresponding to the values to aggregate | | `value` | `DOUBLE PRECISION` | Column to aggregate. | | `resolution` | `INT` | Approximate number of points to return. Intended to represent the horizontal resolution in which the aggregate will be graphed
### Returns |Column|Type|Description| |---|---|---| | `normalizedtimevector` | `NormalizedTimevector` | A object representing a series of values occurring at set intervals from a starting time. It can be unpacked via `unnest` |
### Sample Usages For this examples assume we have a table 'metrics' with columns 'date' and 'reading' which contains some interesting measurement we've accumulated over a large interval. The following example would take that data and give us a smoothed representation of approximately 10 points which would still show any anomalous readings: ```SQL SELECT time, round(value::numeric, 14) FROM unnest( (SELECT asap_smooth(date, reading, 8) FROM metrics)); ``` ```output time | value ------------------------------+------------------- 2020-01-01 01:00:00+00 | 5.18067120121489 2020-01-02 00:51:25.714285+00 | 5.60453762172858 2020-01-03 00:42:51.42857+00 | 5.67427410239845 2020-01-04 00:34:17.142855+00 | 5.34902995864025 2020-01-05 00:25:42.85714+00 | 4.81932879878511 2020-01-06 00:17:08.571425+00 | 4.39546237827141 2020-01-07 00:08:34.28571+00 | 4.32572589760154 2020-01-07 23:59:59.999995+00 | 4.65097004135974 ``` ================================================ FILE: docs/client.md ================================================ # Client-side aggregation [experimental](/docs/README.md#tag-notes) - Current status: prototype - Effort remaining: lots ## Purpose We have long suspected it might be valuable to allow building aggregates client-side rather than requiring all data be stored in postgres and aggregated within the toolkit. https://github.com/timescale/timescaledb-toolkit/issues/485 recently came in adding weight to this idea. Because this customer requests tdigest, that's what we'll use for prototyping. ## Use cases Quoting the above customer: "In some cases it is not possible to transfer all the non-aggregated data to TimescaleDB due to it's amount and/or limited connectivity." ## Questions - Do we want to support a public crate? - What does that mean? - Do we need to monitor an email address? - What promise would we make on response time? - Is this materially different from what we've already signed up for by publishing on github? - How do we handle ownership of the crates.io credentials? - Which license do we use? - Some of our code is already a derived work - do we permissively license it all, or restrict some of it? - Wire protocol maintenance - This is a problem we already have, we just didn't realize it, as it is already possible to construct our aggregates and INSERT them, and they also in pg dumps; at the moment, you can restore those dumps, though we haven't made any promise about it. On our stabilized aggregates, users may assume that is stabilized, too. - Is there a practical concern here? Or do we just say "not supported"? - Is it possible to crash the extension with invalid inputs? - If we commit to a public wire protocol, shouldn't we avoid the Rust-specific ron and go for something more common? ## Proposal As a first step, build a crate which externalizes tdigest aggregate creation. ```rust let mut digester = tdigest::Builder::with_size(N); loop { digester.push(value); } send_to_postgres(format!("INSERT INTO digests VALUES ({})", digester.build().format_for_postgres())); ``` In order to provide that API, we must first reorganize the tdigest implementation so that all business logic is in the tdigest crate. Some is currently in the pgrx extension crate. For each aggregate, the transient state is actually a Builder pattern hidden hidden behind pgrx machinery. On this branch, I've moved TDigestTransState into tdigest::Builder. Currently, we use default ron behavior to serialize the raw implementation details of the pg_type . Users can insert inconsistent data now, and it doesn't look like we validate that at insertion time. We should reconsider this for all pg_types regardless of the overall client project. Is it possible NOT to offer serialized insertion at all? If so, turning that off would be a good first step. Then we can enable it just where we want to. We should put more thought into the serialization format we intentionally support. Currently it contains redundancy which we can eliminate by implementing serialization carefully rather than relying on defaults. ## Proof of concept This is a simple demonstration of inserting serialized tdigest into a table, showing that it works the same way as an aggregate built by the extension. ```SQL ,non-transactional CREATE TABLE test (data DOUBLE PRECISION); INSERT INTO test SELECT generate_series(0.01, 1, 0.01); CREATE VIEW digest AS SELECT tdigest(100, data) FROM test; CREATE TABLE digest2 (tdigest tdigest); INSERT INTO digest2 VALUES ('(version:1,max_buckets:100,count:100,sum:50.50000000000001,min:0.01,max:1,centroids:[(mean:0.01,weight:1),(mean:0.02,weight:1),(mean:0.03,weight:1),(mean:0.04,weight:1),(mean:0.05,weight:1),(mean:0.06,weight:1),(mean:0.07,weight:1),(mean:0.08,weight:1),(mean:0.09,weight:1),(mean:0.1,weight:1),(mean:0.11,weight:1),(mean:0.12,weight:1),(mean:0.13,weight:1),(mean:0.14,weight:1),(mean:0.15,weight:1),(mean:0.16,weight:1),(mean:0.17,weight:1),(mean:0.18,weight:1),(mean:0.19,weight:1),(mean:0.2,weight:1),(mean:0.21,weight:1),(mean:0.22,weight:1),(mean:0.23,weight:1),(mean:0.24,weight:1),(mean:0.25,weight:1),(mean:0.26,weight:1),(mean:0.27,weight:1),(mean:0.28,weight:1),(mean:0.29,weight:1),(mean:0.3,weight:1),(mean:0.31,weight:1),(mean:0.32,weight:1),(mean:0.33,weight:1),(mean:0.34,weight:1),(mean:0.35,weight:1),(mean:0.36,weight:1),(mean:0.37,weight:1),(mean:0.38,weight:1),(mean:0.39,weight:1),(mean:0.4,weight:1),(mean:0.41,weight:1),(mean:0.42,weight:1),(mean:0.43,weight:1),(mean:0.44,weight:1),(mean:0.45,weight:1),(mean:0.46,weight:1),(mean:0.47,weight:1),(mean:0.48,weight:1),(mean:0.49,weight:1),(mean:0.5,weight:1),(mean:0.51,weight:1),(mean:0.525,weight:2),(mean:0.545,weight:2),(mean:0.565,weight:2),(mean:0.585,weight:2),(mean:0.605,weight:2),(mean:0.625,weight:2),(mean:0.64,weight:1),(mean:0.655,weight:2),(mean:0.675,weight:2),(mean:0.69,weight:1),(mean:0.705,weight:2),(mean:0.72,weight:1),(mean:0.735,weight:2),(mean:0.75,weight:1),(mean:0.76,weight:1),(mean:0.775,weight:2),(mean:0.79,weight:1),(mean:0.8,weight:1),(mean:0.815,weight:2),(mean:0.83,weight:1),(mean:0.84,weight:1),(mean:0.85,weight:1),(mean:0.86,weight:1),(mean:0.87,weight:1),(mean:0.88,weight:1),(mean:0.89,weight:1),(mean:0.9,weight:1),(mean:0.91,weight:1),(mean:0.92,weight:1),(mean:0.93,weight:1),(mean:0.94,weight:1),(mean:0.95,weight:1),(mean:0.96,weight:1),(mean:0.97,weight:1),(mean:0.98,weight:1),(mean:0.99,weight:1),(mean:1,weight:1)])'); ``` ```SQL SELECT min_val(tdigest), max_val(tdigest), num_vals(tdigest) FROM digest; ``` ```output min_val | max_val | num_vals ---------+---------+---------- 0.01 | 1 | 100 ``` Inserting serialized tdigest into table behaves the same: ```SQL SELECT min_val(tdigest), max_val(tdigest), num_vals(tdigest) FROM digest2; ``` ```output min_val | max_val | num_vals ---------+---------+---------- 0.01 | 1 | 100 ``` ================================================ FILE: docs/counter_agg.md ================================================ # Counter Aggregates > [Description](#counter-agg-description)
> [Example Usage](#counter-agg-examples)
> [API](#counter-agg-api)
> [Notes on Parallelism and Ordering](#counter-agg-ordering)
> [Extrapolation Methods and Considerations](#counter-agg-methods)
## Description Metrics generally come in a few different varieties, which many systems have come to call *gauges* and *counters*. A gauge is a typical metric that can vary up or down, something like temperature or percent utilization. A counter is meant to be monotonically increasing. So it keeps track of, say, the total number of visitors to a website. The main difference in processing counters and gauges is that a decrease in the value of a counter (compared to its previous value in the timevector) is interpreted as a *reset*. This means that the "true value" of the counter after a decrease is the previous value + the current value. A reset could occur due to a server restart or any number of other reasons. Because of the feature of the reset a counter is often analyzed by taking its change over a time period, accounting for resets. (Our `delta` function offers a way to do this). Accounting for resets is hard in pure SQL, so we've developed aggregate and accessor functions that do the proper calculations for counters. While the aggregate is not parallelizable, it is supported with [continuous aggregation](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates). Additionally, [see the notes on parallelism and ordering](#counter-agg-ordering) for a deeper dive into considerations for use with parallelism and some discussion of the internal data structures. --- ## Example Usage For these examples we'll assume a table `foo` defined as follows: ```SQL ,ignore CREATE TABLE foo ( measure_id BIGINT, ts TIMESTAMPTZ , val DOUBLE PRECISION, PRIMARY KEY (measure_id, ts) ); ``` We'll start by showing a typical usage of a counter aggregate as well as the `delta` accessor function which gives you the change in the counter's value over the time period in question, accounting for any resets. ```SQL ,ignore SELECT measure_id, delta( counter_agg(ts, val) ) FROM foo GROUP BY measure_id; ``` We can also use the [`time_bucket` function](https://docs.timescale.com/latest/api#time_bucket) to produce a series of deltas over 15 minute increments. ```SQL ,ignore SELECT measure_id, time_bucket('15 min'::interval, ts) as bucket, delta( counter_agg(ts, val) ) FROM foo GROUP BY measure_id, time_bucket('15 min'::interval, ts); ``` This will allow us to search for 15 minute periods where the counter increased by a larger or smaller amount. If series are less regular and so the deltas are affected by the number of samples in the 15 minute period, you can use the `extrapolated_delta` function. For this we'll need to provide bounds so we know where to extrapolate to, for this we'll use the `time_bucket_range` function, which works just like `time_bucket` but produces the open ended range `[start, end)` of all the times in the bucket. We'll also use a CTE to do the [`counter_agg`](#counter-agg-point) just so it's a little easier to understand what's going on in each part: ```SQL ,ignore with t as ( SELECT measure_id, time_bucket('15 min'::interval, ts) as bucket, counter_agg(ts, val, bounds => time_bucket_range('15 min'::interval, ts)) FROM foo GROUP BY measure_id, time_bucket('15 min'::interval, ts)) SELECT time_bucket, extrapolated_delta(counter_agg, method => 'prometheus') FROM t ; ``` Note that we're also using the `'prometheus'` method for doing our extrapolation. Our current extrapolation function is built to mimic the Prometheus project's [`increase` function](https://prometheus.io/docs/prometheus/latest/querying/functions/#increase), which measures the change of a counter extrapolated to the edges of the queried region. Of course this might be more useful if we make a continuous aggregate out of it. We'll first have to make it a hypertable partitioned on the ts column: ```SQL ,ignore SELECT create_hypertable('foo', 'ts', chunk_time_interval=> '15 days'::interval, migrate_data => true); ``` Now we can make our continuous aggregate: ```SQL ,ignore CREATE MATERIALIZED VIEW foo_15 WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT measure_id, time_bucket('15 min'::interval, ts) as bucket, counter_agg(ts, val, bounds => time_bucket_range('15 min'::interval, ts)) FROM foo GROUP BY measure_id, time_bucket('15 min'::interval, ts); ``` Note that here, we just use the [`counter_agg`](#counter-agg-point) function. It's often better to do that and simply run the accessor functions on the result, it's much more flexible that way, as there are many accessor functions, and the data is there so you can run multiple of them over the same aggregate. ```SQL ,ignore SELECT measure_id, bucket, delta(counter_agg), rate(counter_agg), extrapolated_rate(counter_agg, method => 'prometheus'), slope(counter_agg) FROM foo_15 ``` Here we've used multiple other accessor functions, the `rate` function is a simple `Δval / Δtime` (both observed) calculation, whereas the `extrapolated_rate` with the `'prometheus'` method follows the [Prometheus `rate` function's](https://prometheus.io/docs/prometheus/latest/querying/functions/#rate) behavior of extrapolating to the edges of the boundary and using the bounds provided rather than the observed values. The `slope` function calculates the slope of the least-squares fit line of the values over time. The counter resets are accounted for and "true" values are fed into the linear regression algorithm before this slope is computed. We can also re-aggregate from the continuous aggregate into a larger bucket size quite simply: ```SQL ,ignore SELECT measure_id, time_bucket('1 day'::interval, bucket), delta( rollup(counter_agg) ) FROM foo_15 GROUP BY measure_id, time_bucket('1 day'::interval, bucket); ``` There are several other accessor functions which we haven't described in the examples here, but are listed in the API section under the [accessors](#counter-agg-api-accessors). --- # Command List ### [Aggregate Functions](#counter-agg-api-aggregates) > - [counter_agg() (point form)](#counter-agg-point) > - [rollup() (summary form)](#counter-agg-summary) ### [Accessor Functions (A-Z)](#counter-agg-api-accessors) > - [corr()](#counter-agg-corr) > - [counter_zero_time()](#counter-agg-counter-zero-time) > - [delta()](#counter-agg-delta) > - [extrapolated_delta()](#counter-agg-extrapolated-delta) > - [extrapolated_rate()](#counter-agg-extrapolated-rate) > - [idelta_left()](#counter-agg-idelta-left) > - [idelta_right()](#counter-agg-idelta-right) > - [intercept()](#counter-agg-intercept) > - [irate_left()](#counter-agg-irate-left) > - [irate_right()](#counter-agg-irate-right) > - [num_changes()](#counter-agg-num-changes) > - [num_elements()](#counter-agg-num-elements) > - [num_resets()](#counter-agg-num-resets) > - [rate()](#counter-agg-rate) > - [slope()](#counter-agg-slope) > - [time_delta()](#counter-agg-time-delta) ### [Utility Functions](#counter-agg-api-utilities) > - [with_bounds()](#counter-agg-with-bounds) --- # Aggregate Functions Aggregating a counter to produce a `CounterSummary` is the first step in performing any calculations on it. There are two basic forms, one which takes in timestamps and values (the point form) and one which can combine multiple `CounterSummaries` together to form a larger summary spanning a larger amount of time. (See [Notes on Parallelism and Ordering](#counter-agg-ordering) for more information on how that works). --- ## **counter_agg() (point form)** ```SQL ,ignore counter_agg( ts TIMESTAMPTZ, value DOUBLE PRECISION¹, bounds TSTZRANGE DEFAULT NULL ) RETURNS CounterSummary ``` An aggregate that produces a `CounterSummary` from timestamps and associated values. ##### ¹ Note that the `value` is currently only accepted as a `DOUBLE PRECISION` number as most people use that for counters, even though other numeric types (ie `BIGINT`) might sometimes be more intuitive. If you store a value as a different numeric type you can cast to `DOUBLE PRECISION` on input to the function. ### Required Arguments² |Name| Type |Description| |---|---|---| | `ts` | `TIMESTAMPTZ` | The time at each point | | `value` | `DOUBLE PRECISION` | The value at each point to use for the counter aggregate|
##### ² Note that `ts` and `value` can be `null`, however the aggregate is not evaluated on `null` values and will return `null`, but it will not error on `null` inputs. ### Optional Arguments |Name| Type |Description| |---|---|---| | `bounds` | `TSTZRANGE` | A range of `timestamptz` representing the largest and smallest possible times that could be input to this aggregate. Calling with `NULL` or leaving out the argument results in an unbounded `CounterSummary`. Bounds are required for extrapolation, but not for other [accessor functions](#counter-agg-api-accessors). |
### Returns |Column|Type|Description| |---|---|---| | `counter_agg` | `CounterSummary` | A CounterSummary object that can be passed to [accessor functions](#counter-agg-api-accessors) or other objects in the counter aggregate API |
### Sample Usage ```SQL ,ignore WITH t as ( SELECT time_bucket('1 day'::interval, ts) as dt, counter_agg(ts, val) AS cs -- get a CounterSummary FROM foo WHERE id = 'bar' GROUP BY time_bucket('1 day'::interval, ts) ) SELECT dt, irate_right(cs) -- extract instantaneous rate from the CounterSummary FROM t; ``` --- ## **rollup() (summary form)** ```SQL ,ignore rollup( cs CounterSummary ) RETURNS CounterSummary ``` An aggregate to compute a combined `CounterSummary` from a series of non-overlapping `CounterSummaries`. Non-disjoint `CounterSummaries` will cause errors. See [Notes on Parallelism and Ordering](#counter-agg-ordering) for more information. ### Required Arguments² |Name| Type |Description| |---|---|---| | `cs` | `CounterSummary` | The input CounterSummary from a previous [`counter_agg`](#counter-agg-point) (point form) call, often from a [continuous aggregate](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates)| ##### ² Note that `summary` can be `null`, however the aggregate is not evaluated on `null` values and will return `null`, but it will not error on `null` inputs. ### Returns |Column|Type|Description| |---|---|---| | `counter_agg` | `CounterSummary` | A CounterSummary object that can be passed to [accessor functions](#counter-agg-api-accessors) or other objects in the counter aggregate API|
### Sample Usage ```SQL ,ignore WITH t as ( SELECT date_trunc('day', ts) as dt, counter_agg(ts, val) AS counter_summary -- get a time weight summary FROM foo WHERE id = 'bar' GROUP BY date_trunc('day') ), q as ( SELECT rollup(counter_summary) AS full_cs -- do a second level of aggregation to get the full CounterSummary FROM t ) SELECT dt, delta(counter_summary), -- extract the delta from the CounterSummary delta(counter_summary) / (SELECT delta(full_cs) FROM q LIMIT 1) as normalized -- get the fraction of the delta that happened each day compared to the full change of the counter FROM t; ``` # Accessor Functions ## Accessor Function List (by family) ### [Change over time (delta) functions](#counter-agg-delta-fam) > - [delta()](#counter-agg-delta) > - [extrapolated_delta()](#counter-agg-extrapolated-delta) > - [idelta_left()](#counter-agg-idelta-left) > - [idelta_right()](#counter-agg-idelta-right) > - [time_delta()](#counter-agg-time-delta) ### Rate of change over time (rate) functions > - [rate()](#counter-agg-rate) > - [extrapolated_rate()](#counter-agg-extrapolated-rate) > - [irate_left()](#counter-agg-irate-left) > - [irate_right()](#counter-agg-irate-right) ### Counting functions > - [num_changes()](#counter-agg-num-changes) > - [num_elements()](#counter-agg-num-elements) > - [num_resets()](#counter-agg-num-resets) ### Statistical regression / least squares fit functions > - [slope()](#counter-agg-slope) > - [intercept()](#counter-agg-intercept) > - [counter_zero_time()](#counter-agg-counter-zero-time) > - [corr()](#counter-agg-corr) --- ## **Change over time (delta) functions** Functions in the delta family are dedicated to finding the change in a value (or observed time, in the case of `time_delta`) of a counter during a time period, taking into account any counter resets that may have occurred. --- ## **delta()** ```SQL ,ignore delta( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The change in the counter over the time period. This is the raw or simple delta computed by accounting for resets then subtracting the last seen value from the first. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `delta` | `DOUBLE PRECISION` | The delta computed from the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, delta(summary) FROM ( SELECT id, counter_agg(ts, val) AS summary FROM foo GROUP BY id ) t ``` --- ## **extrapolated_delta()** ```SQL ,ignore extrapolated_delta( summary CounterSummary, method TEXT¹ ) RETURNS DOUBLE PRECISION ``` The change in the counter during the time period specified by the `bounds` in the `CounterSummary`. To calculate the extrapolated delta, any counter resets are accounted for and the observed values are extrapolated to the bounds using the `method` specified (see [Extrapolation Methods and Considerations](#counter-agg-methods)) then the values are subtracted to compute the delta. The `bounds` must be specified for the `extrapolated_delta` function to work, the bounds can be provided in the [`counter_agg`](#counter-agg-point) call, or by using the [`with_bounds`](#counter-agg-with-bounds) utility function to set the bounds ##### ¹ Currently, the only allowed value of `method` is `'prometheus'`, as we have only implemented extrapolation following the Prometheus extrapolation protocol, see [Extrapolation Methods and Considerations](#counter-agg-methods) for more information. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| | `method` | `TEXT` | The extrapolation method to use, the only option currently is 'prometheus', not case sensitive.| ### Returns |Column|Type|Description| |---|---|---| | `extrapolated_delta` | `DOUBLE PRECISION` | The delta computed from the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, extrapolated_delta( with_bounds( summary, time_bucket_range('15 min'::interval, bucket) ) ) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **idelta_left()** ```SQL ,ignore idelta_left( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The instantaneous change in the counter at the left (earlier) side of the time range. Essentially, the first value subtracted from the second value seen in the time range (handling resets appropriately). This can be especially useful for fast moving counters. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `idelta_left` | `DOUBLE PRECISION` | The instantaneous delta computed from left (earlier) side of the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, idelta_left(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **idelta_right()** ```SQL ,ignore idelta_right( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The instantaneous change in the counter at the right (later) side of the time range. Essentially, the penultimate value subtracted from the last value seen in the time range (handling resets appropriately). This can be especially useful for fast moving counters. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `idelta_right` | `DOUBLE PRECISION` | The instantaneous delta computed from right (later) side of the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, idelta_right(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **time_delta()** ```SQL ,ignore time_delta( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The observed change in time (`last time - first time`) over the period aggregated. Measured in seconds. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `time_delta` | `DOUBLE PRECISION` | The total duration in seconds between the first and last observed times in the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, time_delta(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **Rate of change over time (rate) functions** The rate family of functions find the reset-adjusted rate of change (`delta(value)/delta(time)`) of a counter on a per-second basis. --- ## **rate()** ```SQL ,ignore rate( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The rate of change of the counter over the observed time period. This is the raw or simple rate, equivalent to `delta(summary) / time_delta(summary)`. After accounting for resets, we subtract the last value from the first and divide by the duration between the last observed time and the first observed time. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `rate` | `DOUBLE PRECISION` | The per second observed rate computed from the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, rate(summary) FROM ( SELECT id, counter_agg(ts, val) AS summary FROM foo GROUP BY id ) t ``` --- ## **extrapolated_rate()** ```SQL ,ignore extrapolated_rate( summary CounterSummary, method TEXT¹ ) RETURNS DOUBLE PRECISION ``` The rate of change in the counter computed over the time period specified by the `bounds` in the `CounterSummary`, extrapolating to the edges. Essentially, it is an [`extrapolated_delta`](#counter-agg-extrapolated-delta) divided by the duration in seconds. The `bounds` must be specified for the `extrapolated_rate` function to work, the bounds can be provided in the [`counter_agg`](#counter-agg-point) call, or by using the [`with_bounds`](#counter-agg-with-bounds) utility function to set the bounds ##### ¹ Currently, the only allowed value of `method` is `'prometheus'`, as we have only implemented extrapolation following the Prometheus extrapolation protocol, see [Extrapolation Methods and Considerations](#counter-agg-methods) for more information. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| | `method` | `TEXT` | The extrapolation method to use, the only option currently is 'prometheus', not case sensitive.| ### Returns |Column|Type|Description| |---|---|---| | `extrapolated_rate` | `DOUBLE PRECISION` | The per-second rate of change of the counter computed from the `CounterSummary` extrapolated to the `bounds` specified there. |
### Sample Usage ```SQL ,ignore SELECT id, bucket, extrapolated_rate( with_bounds( summary, time_bucket_range('15 min'::interval, bucket) ) ) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **irate_left()** ```SQL ,ignore irate_left( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The instantaneous rate of change of the counter at the left (earlier) side of the time range. Essentially, the [`idelta_left`](#counter-agg-idelta-left) divided by the duration between the first and second observed points in the `CounterSummary`. This can be especially useful for fast moving counters. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `irate_left` | `DOUBLE PRECISION` | The instantaneous rate computed from left (earlier) side of the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, irate_left(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **irate_right()** ```SQL ,ignore irate_right( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The instantaneous rate of change of the counter at the right (later) side of the time range. Essentially, the [`idelta_right`](#counter-agg-idelta-right) divided by the duration between the first and second observed points in the `CounterSummary`. This can be especially useful for fast moving counters. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `irate_right` | `DOUBLE PRECISION` | The instantaneous rate computed from right (later) side of the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, irate_right(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- # **Counting functions** The counting functions comprise several accessor functions that calculate the number of times a certain thing occurred while calculating the [`counter_agg`](#counter-agg-point). --- ## **num_changes()** ```SQL ,ignore num_changes( summary CounterSummary ) RETURNS BIGINT ``` The number of times the value changed within the period over which the `CounterSummary` is calculated. This is determined by evaluating consecutive points, any change counts, including counter resets where the counter is reset to zero, while this would result in the same _adjusted_ counter value for consecutive points, we still treat it as a change. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `num_changes` | `BIGINT` | The number of times the value changed|
### Sample Usage ```SQL ,ignore SELECT id, bucket, num_changes(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **num_elements()** ```SQL ,ignore num_elements( summary CounterSummary ) RETURNS BIGINT ``` The total number of points we saw in calculating the `CounterSummary`. Only points with distinct times are counted, as duplicate times are thrown out in general in these calculations. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input `CounterSummary` from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `num_elements` | `BIGINT` | The number of points seen during the [`counter_agg`](#counter-agg-point) call|
### Sample Usage ```SQL ,ignore SELECT id, bucket, num_elements(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **num_resets()** ```SQL ,ignore num_resets( summary CounterSummary ) RETURNS BIGINT ``` The total number of times we detected a counter reset while calculating the `CounterSummary`. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input `CounterSummary` from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `num_resets` | `BIGINT` | The number of resets detected during the [`counter_agg`](#counter-agg-point) call|
### Sample Usage ```SQL ,ignore SELECT id, bucket, num_resets(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- # **Statistical regression functions** The statistical regression family of functions contains several functions derived from a least squares fit of the adjusted value of the counter. All counter values have resets accounted for before being fed into the linear regression algorithm (and any combined `CounterSummaries` have the proper adjustments performed for resets to enable the proper regression analysis to be performed). ###### NB: Note that the timestamps input are converted from their their internal representation (microseconds since the Postgres Epoch (which is 2000-01-01 00:00:00+00, for some reason), to double precision numbers representing seconds from the Postgres Epoch, with decimal places as fractional seconds, before running the linear regression. Because the internal representation of the timestamp is actually 64-bit integer representing microseconds from the Postgres Epoch, it provides more precision for very large timestamps (the representable range goes out to 294276-12-31). If you want to have accurate, microsecond level precision on your regression analysis dealing with dates at the edge of this range (first off, who are you and *what the heck are you working on???*) we recommend subtracting a large static date from your timestamps and then adding it back after the analysis has concluded. Very small timestamps should be fine as the range does not extend beyond 4714-11-01 BCE, beyond which Julian dates [are not considered reliable by Postgres](https://github.com/postgres/postgres/blob/c30f54ad732ca5c8762bb68bbe0f51de9137dd72/src/include/datatype/timestamp.h#L131). This means that the negative integers are not fully utilized in the timestamp representation and you don't have to worry about imprecision in your computed slopes if you have traveled back in time and are timing chariot races to the microsecond. However, if you travel much further back in time, you're still SOL, as we can't represent the timestamp in the Julian calendar. --- ## **slope()** ```SQL ,ignore slope( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The slope of the least squares fit line computed from the adjusted counter values and times input in the `CounterSummary`. Because the times are input as seconds, the slope will provide a per-second rate of change estimate based on the least squares fit, which will often be similar to the result of the `rate` calculation, but may more accurately reflect the "usual" behavior if there are infrequent, large changes in a counter. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `slope` | `DOUBLE PRECISION` | The per second rate of change computed by taking the slope of the least squares fit of the points input in the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, slope(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **intercept()** ```SQL ,ignore intercept( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The intercept of the least squares fit line computed from the adjusted counter values and times input in the `CounterSummary`. This will correspond to the projected value at the Postgres Epoch (2000-01-01 00:00:00+00) - which is not all that useful for much of anything except potentially drawing the best fit line on a graph, using the slope and the intercept. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `intercept` | `DOUBLE PRECISION` | The intercept of the least squares fit line computed from the points input to the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, intercept(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **counter_zero_time()** ```SQL ,ignore counter_zero_time( summary CounterSummary ) RETURNS TIMESTAMPTZ ``` The time at which the counter value is predicted to have been zero based on the least squares fit line computed from the points in the `CounterSummary`. The ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `counter_zero_time` | `TIMESTAMPTZ` | The time at which the counter value is predicted to have been zero based on the least squares fit of the points input to the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, counter_zero_time(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- ## **corr())** ```SQL ,ignore corr( summary CounterSummary ) RETURNS DOUBLE PRECISION ``` The correlation coefficient of the least squares fit line of the adjusted counter value. Given that the slope a line for any counter value must be non-negative, this will also always be non-negative and in the range from [0.0, 1.0] It measures how well the least squares fit fit the available data, where a value of 1.0 represents the strongest correlation between time the counter increasing. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input CounterSummary from a [`counter_agg`](#counter-agg-point) call.| ### Returns |Column|Type|Description| |---|---|---| | `corr` | `DOUBLE PRECISION` | The correlation coefficient computed from the least squares fit of the adjusted counter values input to the `CounterSummary`|
### Sample Usage ```SQL ,ignore SELECT id, bucket, corr(summary) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` # **Utility Functions** --- ## **with_bounds() ** ```SQL ,ignore with_bounds( summary CounterSummary, bounds TSTZRANGE, ) RETURNS CounterSummary ``` A utility function to add bounds to an already-computed `CounterSummary`. The bounds represent the outer limits of the timestamps allowed for this `CounterSummary` as well as the edges of the range to extrapolate to in functions that do that. ### Required Arguments |Name| Type |Description| |---|---|---| | `summary` | `CounterSummary` | The input `CounterSummary`, | `bounds` | `TSTZRANGE` | A range of `timestamptz` representing the largest and smallest allowed times in this `CounterSummary` | ### Returns |Column|Type|Description| |---|---|---| | `counter_agg` | `CounterSummary` | A CounterSummary object that can be passed to [accessor functions](#counter-agg-api-accessors) or other objects in the counter aggregate API|
### Sample Usage ```SQL ,ignore SELECT id, bucket, extrapolated_rate( with_bounds( summary, time_bucket_range('15 min'::interval, bucket) ) ) FROM ( SELECT id, time_bucket('15 min'::interval, ts) AS bucket, counter_agg(ts, val) AS summary FROM foo GROUP BY id, time_bucket('15 min'::interval, ts) ) t ``` --- # Notes on Parallelism and Ordering The counter reset calculations we perform require a strict ordering of inputs and therefore the calculations are not parallelizable in the strict Postgres sense. This is because when Postgres does parallelism it hands out rows randomly, basically as it sees them to workers. However, if your parallelism can guarantee disjoint (in time) sets of rows, the algorithm can be parallelized, just so long as within some time range, all rows go to the same worker. This is the case for both [continuous aggregates](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates) and for [distributed hypertables](https://docs.timescale.com/latest/using-timescaledb/distributed-hypertables) (as long as the partitioning keys are in the group by, though the aggregate itself doesn't horribly make sense otherwise). We throw an error if there is an attempt to combine overlapping `CounterSummaries`, for instance, in our example above, if you were to try to combine summaries across `measure_id`'s it would error (assuming that they had overlapping times). This is because the counter values resetting really only makes sense within a given time series determined by a single `measure_id`. However, once an accessor function is applied, such as `delta`, a sum of deltas may be computed. Similarly, an average or histogram of rates across multiple time series might be a useful calculation to perform. The thing to note is that the counter aggregate and the reset logic should be performed first, then further calculations may be performed on top of that. As an example, let's consider that we might want to find which of my counters had the most extreme rates of change in each 15 minute period. For this, we'll want to normalize the rate of change of each measure by dividing it by the average rate of change over all the counters in that 15 minute period. We'll use the normal `avg` function to do this, but we'll use it as a window function like so: ```SQL ,ignore WITH t as (SELECT measure_id, time_bucket('15 min'::interval, ts) AS bucket, rate( counter_agg(ts, val) ) as rate FROM foo GROUP BY measure_id), SELECT measure_id, bucket, rate, rate / avg(rate_per_measure) OVER (PARTITION BY bucket) AS normalized_rate -- call normal avg function as a window function to get a 15 min avg to normalize our per-measure rates FROM t; ``` Still, note that the counter resets are accounted for before applying the `avg` function in order to get our normalized rate. Internally, the `CounterSummary` stores: - the first, second, penultimate, and last points seen - the sum of all the values at reset points, as well as the number of changes, and number of resets seen. - A set of 6 values used to compute all the statistical regression parameters using the Youngs-Cramer algorithm. - Optionally, the bounds as an open-ended range, over which extrapolation should occur and which represents the outer possible limit of times represented in this `CounterSummary` In general, the functions support [partial aggregation](https://www.postgresql.org/docs/current/xaggr.html#XAGGR-PARTIAL-AGGREGATES) and partitionwise aggregation in the multinode context, but are not parallelizable (in the Postgres sense, which requires them to accept potentially overlapping input). Because they require ordered sets, the aggregates build up a buffer of input data, sort it and then perform the proper aggregation steps. In cases where memory is proving to be too small to build up a buffer of points causing OOMs or other issues, a multi-level aggregate can be useful. So where I might run into OOM issues if I computed the values over all time like so: ```SQL ,ignore SELECT measure_id, rate( counter_agg(ts, val) ) as rate FROM foo GROUP BY measure_id; ``` If I were to instead, compute the [`counter_agg`](#counter-agg-point) over, say daily buckets and then combine the aggregates, I might be able to avoid OOM issues, as each day will be computed separately first and then combined, like so: ```SQL ,ignore WITH t as (SELECT measure_id, time_bucket('1 day'::interval, ts) AS bucket, counter_agg(ts, val) FROM foo GROUP BY measure_id), SELECT measure_id, \ rate( rollup(counter_agg) --combine the daily `CounterSummaries` to make a full one over all time, accounting for all the resets, then apply the rate function ) FROM t; ``` Moving aggregate mode is not supported by [`counter_agg`](#counter-agg-point) and its use as a window function may be quite inefficient. --- # Extrapolation Methods Details #TODO ================================================ FILE: docs/examples/tdigest.c ================================================ // cc -o tdigest tdigest.c $CARGO_TARGET_DIR/$PROFILE/libtimescaledb_toolkit_tdigest.a -lm -lpthread -ldl // Sample program which prints the expected output of the test_tdigest_io test. //////////////////////////////////////////////////////////////////////////////// // TODO Generate a header from tdigest-lib crate. #include struct TDigestBuilder; struct TDigest; // Return pointer to new TDigestBuilder. // MUST NOT be passed to free(3). Instead, pass to timescaledb_toolkit_tdigest_builder_free to // discard or to timescaledb_toolkit_tdigest_build to convert to TDigest. // Never returns NULL. struct TDigestBuilder * timescaledb_toolkit_tdigest_builder_with_size(size_t size); void timescaledb_toolkit_tdigest_push(struct TDigestBuilder *builder, double value); void timescaledb_toolkit_tdigest_merge(struct TDigestBuilder *builder, struct TDigestBuilder *other); // Free a TDigestBuilder that has not been built. // MUST NOT be passed NULL. void timescaledb_toolkit_tdigest_builder_free(struct TDigestBuilder *builder); // Return pointer to new TDigest built from builder. // builder MUST NOT be passed to timescaledb_toolkit_tdigest_builder_free . struct TDigest * timescaledb_toolkit_tdigest_build(struct TDigestBuilder *builder); // Free a TDigest. void timescaledb_toolkit_tdigest_free(struct TDigest *td); // Return pointer to null-terminated buffer containing ASCII serialization of TDigest suitable for // use with postgresql INSERT. // Free the buffer with free(3). char * timescaledb_toolkit_tdigest_format_for_postgres(struct TDigest *td); //////////////////////////////////////////////////////////////////////////////// #include #include int main() { struct TDigestBuilder *builder = timescaledb_toolkit_tdigest_builder_with_size(100); double value; for (value = 1.0; value <= 100.0; value++) { timescaledb_toolkit_tdigest_push(builder, value); } struct TDigest *td = timescaledb_toolkit_tdigest_build(builder); char *formatted = timescaledb_toolkit_tdigest_format_for_postgres(td); printf("%s\n", formatted); free(formatted); timescaledb_toolkit_tdigest_free(td); return 0; } ================================================ FILE: docs/examples/tdigest.py ================================================ import ctypes import os _cdll = ctypes.CDLL(os.path.join( os.getenv('CARGO_TARGET_DIR', 'target'), os.getenv('PROFILE', 'debug'), 'libtimescaledb_toolkit_tdigest.so')) _cdll.timescaledb_toolkit_tdigest_builder_with_size.restype = ctypes.c_void_p _cdll.timescaledb_toolkit_tdigest_build.restype = ctypes.c_void_p _cdll.timescaledb_toolkit_tdigest_format_for_postgres.restype = ctypes.POINTER(ctypes.c_char) _cdll.timescaledb_toolkit_tdigest_push.restype = None _cdll.timescaledb_toolkit_tdigest_merge.restype = None _cdll.timescaledb_toolkit_tdigest_builder_free.restype = None _cdll.timescaledb_toolkit_tdigest_free.restype = None # Wrapper classes use `real_pointer` to keep hold of the real pointer for as # long as it needs to be released. # We copy it to self.pointer to enforce use of `with` (as much as anything can be enforced in Python). # Attempting to forego `with` results in `AttributeError`. class TDigest: class Builder: def __init__(self, pointer): self.real_pointer = pointer def __enter__(self): self.pointer = self.real_pointer return self def __exit__(self, exc_type, exc_val, exc_tb): self.__del__() self.real_pointer = None if 'pointer' in self.__dict__: del self.__dict__['pointer'] def __del__(self): if self.real_pointer is not None: _cdll.timescaledb_toolkit_tdigest_builder_free(self.real_pointer) def with_size(size): return TDigest.Builder(ctypes.c_void_p(_cdll.timescaledb_toolkit_tdigest_builder_with_size(ctypes.c_size_t(size)))) def push(self, value): _cdll.timescaledb_toolkit_tdigest_push(self.pointer, ctypes.c_double(value)) def build(self): td = TDigest(ctypes.c_void_p(_cdll.timescaledb_toolkit_tdigest_build(self.pointer))) self.real_pointer = None del self.__dict__['pointer'] return td def __init__(self, pointer): self.real_pointer = pointer def __enter__(self): self.pointer = self.real_pointer return self def __exit__(self, exc_type, exc_val, exc_tb): self.__del__() self.real_pointer = None if 'pointer' in self.__dict__: del self.__dict__['pointer'] def __del__(self): if self.real_pointer is not None: _cdll.timescaledb_toolkit_tdigest_free(self.real_pointer) def format_for_postgres(self): buf = _cdll.timescaledb_toolkit_tdigest_format_for_postgres(self.pointer) s = ctypes.cast(buf, ctypes.c_char_p).value.decode('ascii') # TODO free(3) left as an exercise to the reader. This is for GNU libc on Linux/amd64: ctypes.CDLL('libc.so.6').free(buf) return s # Sample program which prints the expected output of the test_tdigest_io test. def test(): with TDigest.Builder.with_size(100) as builder: for value in range(1, 101): builder.push(value) with builder.build() as td: print(td.format_for_postgres()) if __name__ == '__main__': test() ================================================ FILE: docs/gauge_agg.md ================================================ # Gauge Aggregates [experimental](/docs/README.md#tag-notes) A gauge is a metric similar to a counter, with the primary difference being that it measures a value that varies up and down over time, rather than an ever-increasing COUNT of the number of times something happened. Examples include resource utilization metrics, precipitation levels, or temperatures. `gauge_agg` currently shares implementation with `counter_agg` but without the resetting logic. This means it enforces ordering even though that is not necessarily required for all gauge aggregates. We may offer an additional unordered gauge aggregate in the future. # Test table Examples below are tested against the following table: ```SQL ,non-transactional SET TIME ZONE 'UTC'; CREATE TABLE gauge_test ( measure_id BIGINT, ts TIMESTAMPTZ , val DOUBLE PRECISION, PRIMARY KEY (measure_id, ts) ); INSERT INTO gauge_test SELECT 1, '2020-01-03 UTC'::timestamptz + make_interval(days=>v), v + 1000 FROM generate_series(1,10) v; INSERT INTO gauge_test SELECT 2, '2020-01-03 UTC'::timestamptz + make_interval(days=>v), v + 2000 FROM generate_series(1,10) v; INSERT INTO gauge_test SELECT 3, '2020-01-03 UTC'::timestamptz + make_interval(days=>v), v + 3000 FROM generate_series(1,10) v; ``` ## Functions ### delta ```SQL,ignore SELECT toolkit_experimental.delta(toolkit_experimental.gauge_agg(ts, val)) FROM gauge_test; ``` ```ignore delta ------- -1991 ``` ### idelta_left ```SQL,ignore SELECT toolkit_experimental.idelta_left(toolkit_experimental.gauge_agg(ts, val)) FROM gauge_test; ``` ```ignore idelta_left ------------- 1002 ``` ### idelta_right ```SQL,ignore SELECT toolkit_experimental.idelta_right(toolkit_experimental.gauge_agg(ts, val)) FROM gauge_test; ``` ```ignore idelta_right -------------- 1010 ``` ### rollup ```SQL WITH t as (SELECT date_trunc('minute', ts), toolkit_experimental.gauge_agg(ts, val) as agg FROM gauge_test group by 1) SELECT toolkit_experimental.delta(toolkit_experimental.rollup(agg)) FROM t; ``` ```output rollup delta -------------- 9 ``` ================================================ FILE: docs/hyperloglog.md ================================================ # Hyperloglog > [Description](#hyperloglog-description)
> [Details](#hyperloglog-details)
> [API](#hyperloglog-api) ## Description TimescaleDB Toolkit provides an implementation of the [Hyperloglog estimator](https://en.wikipedia.org/wiki/HyperLogLog) for `COUNT DISTINCT` approximations of any type that has a hash function. ## Details Timescale's HyperLogLog is implemented as an aggregate function in PostgreSQL. They do not support moving-aggregate mode, and are not ordered-set aggregates. It is restricted to values that have an extended hash function. They are partializable and are good candidates for [continuous aggregation](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates). ## Command List (A-Z) > - [hyperloglog](#hyperloglog) > - [distinct_count](#distinct_count) --- ## **hyperloglog** ```SQL,ignore hyperloglog( size INTEGER, value AnyElement¹ ) RETURNS Hyperloglog ``` ¹The type must have an extended (64bit) hash function. This will construct and return a Hyperloglog with at least the specified number of buckets over the given values. ### Required Arguments |Name| Type |Description| |---|---|---| | `buckets` | `INTEGER` | Number of buckets in the digest. Will be rounded up to the next power of 2, must be between 16 and 2^18. Increasing this will usually provide more accurate at the expense of more storage. | | `value` | `AnyElement` | Column to count the distinct elements of. |
### Returns |Column|Type|Description| |---|---|---| | `hyperloglog` | `Hyperloglog` | A hyperloglog object which may be passed to other hyperloglog APIs. |
### Sample Usages For this examples assume we have a table 'samples' with a column 'weights' holding `DOUBLE PRECISION` values. The following will simply return a digest over that column ```SQL ,ignore SELECT hyperloglog(64, weights) FROM samples; ``` It may be more useful to build a view from the aggregate that we can later pass to other tdigest functions. ```SQL ,ignore CREATE VIEW digest AS SELECT hyperloglog(64, data) FROM samples; ``` --- ## **rollup** ```SQL ,ignore rollup( log hyperloglog ) RETURNS Hyperloglog ``` Returns a Hyperloglog by aggregating over the union of the input elements. ### Required Arguments |Name| Type |Description| |---|---|---| | `log` | `Hyperloglog` | Column of Hyperloglogs to be unioned. |
### Returns |Column|Type|Description| |---|---|---| | `hyperloglog` | `Hyperloglog` | A hyperloglog containing the count of the union of the input Hyperloglogs. |
### Sample Usages ```SQL SELECT distinct_count(rollup(logs)) FROM ( (SELECT hyperloglog(32, v::text) logs FROM generate_series(1, 100) v) UNION ALL (SELECT hyperloglog(32, v::text) FROM generate_series(50, 150) v) ) hll; ``` ```output count ------- 153 ``` --- ## **distinct_count** ```SQL ,ignore distinct_count(hyperloglog Hyperloglog) RETURNS BIGINT ``` Get the number of distinct values from a hyperloglog. ### Required Arguments |Name|Type|Description| |---|---|---| | `hyperloglog` | `Hyperloglog` | The hyperloglog to extract the count from. |
### Returns |Column|Type|Description| |---|---|---| | `distinct_count` | `BIGINT` | The number of distinct elements counted by the hyperloglog. |
### Sample Usages ```SQL SELECT distinct_count(hyperloglog(64, data)) FROM generate_series(1, 100) data ``` ```output distinct_count ---------------- 104 ``` ## **stderror** ```SQL ,ignore stderror(hyperloglog Hyperloglog) RETURNS DOUBLE PRECISION ``` Returns an estimate of the relative stderror of the hyperloglog based on the hyperloglog error formula. Approximate result are: ``` precision ┃ registers ┃ error ┃ bytes ━━━━━━━━━━━╋━━━━━━━━━━━╋━━━━━━━━╋━━━━━━━━ 4 ┃ 16 ┃ 0.2600 ┃ 12 5 ┃ 32 ┃ 0.1838 ┃ 24 6 ┃ 64 ┃ 0.1300 ┃ 48 7 ┃ 128 ┃ 0.0919 ┃ 96 8 ┃ 256 ┃ 0.0650 ┃ 192 9 ┃ 512 ┃ 0.0460 ┃ 384 10 ┃ 1024 ┃ 0.0325 ┃ 768 11 ┃ 2048 ┃ 0.0230 ┃ 1536 12 ┃ 4096 ┃ 0.0163 ┃ 3072 13 ┃ 8192 ┃ 0.0115 ┃ 6144 14 ┃ 16384 ┃ 0.0081 ┃ 12288 15 ┃ 32768 ┃ 0.0057 ┃ 24576 16 ┃ 65536 ┃ 0.0041 ┃ 49152 17 ┃ 131072 ┃ 0.0029 ┃ 98304 18 ┃ 262144 ┃ 0.0020 ┃ 196608 ``` ### Required Arguments |Name|Type|Description| |---|---|---| | `hyperloglog` | `Hyperloglog` | The hyperloglog to extract the count from. |
### Returns |Column|Type|Description| |---|---|---| | `stderror` | `BIGINT` | The number of distinct elements counted by the hyperloglog. |
### Sample Usages ```SQL SELECT stderror(hyperloglog(64, data)) FROM generate_series(1, 100) data ``` ```output stderror ---------- 0.13 ``` ================================================ FILE: docs/lttb.md ================================================ # Largest Triangle Three Buckets > [Description](#description)
> [Example](#example)
> [API](#api) ## Description [Largest Triangle Three Buckets](https://github.com/sveinn-steinarsson/flot-downsample) is a downsampling method that tries to retain visual similarity between the downsampled data and the original dataset. TimescaleDB Toolkit provides an implementation of this which takes `(timestamp, value)` pairs, sorts them if needed, and downsamples them. ## Usage Example In this example we're going to examine downsampling a 101 point cosine wave generated like so ```SQL ,non-transactional SET TIME ZONE 'UTC'; CREATE TABLE sample_data(time TIMESTAMPTZ, val DOUBLE PRECISION); INSERT INTO sample_data SELECT '2020-01-01 UTC'::TIMESTAMPTZ + make_interval(days=>(foo*10)::int) as time, 10 + 5 * cos(foo) as val FROM generate_series(1,11,0.1) foo ``` ```output INSERT 0 101 ``` when graphed, this waves appears like so ![Raw data](images/lttb_raw.png) we can downsample it to various degrees using `lttb`, for instance, downsampling to 34 points ```SQL SELECT time, value::numeric(10,2) FROM unnest(( SELECT lttb(time, val, 34) FROM sample_data)) ``` ``` output omitted ``` looks like so ![Raw data](images/lttb_34.png) as you further downsample, you retain fewer and fewer datapoints, and the resulting data looks less and less like the original ```SQL SELECT time, value::numeric(10,2) FROM unnest(( SELECT lttb(time, val, 17) FROM sample_data)) ``` ```output time | value ------------------------+------- 2020-01-11 00:00:00+00 | 12.70 2020-01-13 00:00:00+00 | 11.81 2020-01-22 00:00:00+00 | 7.48 2020-01-28 00:00:00+00 | 5.48 2020-02-03 00:00:00+00 | 5.06 2020-02-09 00:00:00+00 | 6.37 2020-02-14 00:00:00+00 | 8.46 2020-02-24 00:00:00+00 | 13.17 2020-03-01 00:00:00+00 | 14.80 2020-03-07 00:00:00+00 | 14.75 2020-03-13 00:00:00+00 | 13.04 2020-03-23 00:00:00+00 | 8.30 2020-03-29 00:00:00+00 | 5.94 2020-04-04 00:00:00+00 | 5.00 2020-04-10 00:00:00+00 | 5.80 2020-04-14 00:00:00+00 | 7.20 2020-04-20 00:00:00+00 | 10.02 ``` ![Raw data](images/lttb_17.png) ```SQL SELECT time, value::numeric(10,2) FROM unnest(( SELECT lttb(time, val, 8) FROM sample_data)) ``` ```output time | value ------------------------+------- 2020-01-11 00:00:00+00 | 12.70 2020-01-27 00:00:00+00 | 5.72 2020-02-06 00:00:00+00 | 5.52 2020-02-27 00:00:00+00 | 14.17 2020-03-09 00:00:00+00 | 14.35 2020-03-30 00:00:00+00 | 5.67 2020-04-09 00:00:00+00 | 5.55 2020-04-20 00:00:00+00 | 10.02 ``` ![Raw data](images/lttb_8.png) ## Command List (A-Z) > - [lttb](#lttb) --- ## **lttb** ```SQL,ignore lttb( time TIMESTAMPTZ, value DOUBLE PRECISION, resolution INTEGER ) RETURNS SortedTimevector ``` This will construct and return a sorted timevector with at most `resolution` points. `unnest(...)` can be used to extract the `(time, value)` pairs from this series ### Required Arguments |Name| Type |Description| |---|---|---| | `time` | `TIMESTAMPTZ` | Time (x) value for the data point. | | `value` | `DOUBLE PRECISION` | Data (y) value for the data point. | | `resolution` | `INTEGER` | Number of points the output should have. |
### Sample Usage ```SQL SELECT time, value FROM unnest(( SELECT lttb(time, val, 4) FROM sample_data)) ``` ```output time | value ------------------------+-------------------- 2020-01-11 00:00:00+00 | 12.7015115293407 2020-02-01 00:00:00+00 | 5.004324248633603 2020-03-03 00:00:00+00 | 14.982710485116087 2020-04-20 00:00:00+00 | 10.022128489940254 ``` ================================================ FILE: docs/ordered-aggregates.md ================================================ # Implementing aggregates that require ordered inputs PostgreSQL has a couple different ways of dealing with aggregates that require ordered inputs, [ordered set aggregates](https://www.postgresql.org/docs/current/functions-aggregate.html#FUNCTIONS-ORDEREDSET-TABLE), which guarantee ordered input but have non-intuitive syntax. You can also specify an ordering within an aggregate call (ie `SELECT array_agg(foo ORDER BY foo, bar)`), however, AFAIK the aggregate doesn't know and has no way of enforcing that this ordering has occurred other than balking if it got out of order data. Both of these have rather annoying syntax and require the *user* to understand that the input needs to be ordered for the aggregate to function. We decided that this was a poor choice. Instead, we decided to do the ordering ourselves *inside* the aggregate function. This means that the transition function for any of the aggregates that require ordering to function (`time_weight`, `counter_agg` etc) first have a transition function that simply builds up an array of inputs to the aggregate, then sorts the array and then processes the inputs in order. In addition, these aggregates have different semantics for combine and rollup than some of our other functions. Once the data has been sorted and processed, in general, these aggregates can *only* be combined in the traditional sense if they contain disjoint regions of time, in other words, only aggregates covering non-overlapping periods of time can be rolled up or combined. PostgreSQL doesn't have a way to guarantee that only non-overlapping time periods can be sent to each parallel worker, rows are distributed essentially as they are seen in a round robin. This means that the aggregates cannot be marked as parallel safe. So then, why do they need combine functions at all? Well, there is another time when combine functions are called and that is in the case of partitionwise aggregation. Partitionwise aggregation is used to perform part of the aggregation on a particular partition and then take the state and combine with aggregates from other partitions. Partitions are disjoint in time for us (this assumes some things and we should still have checks to make sure that we are not getting out of order / overlapping data). We believe the test for this is whether they have a combine function, not whether they are marked parallel safe. Therefore, we always mark these aggregates as parallel restricted rather than parallel safe, which hopefully will allow them to be used for partitionwise but not parallel aggregates. Partitionwise aggregation is a potential large optimization area for multinode so we wanted to make sure we could support that case. This also impacts the way that `rollup` can be called on these functions and the cases in which we should error. Note also that the `combine` and `rollup` functions for these aggregates must do essentially the same thing that the transition function does and build up an array of partial states, then order them and combine them at the end. This is a bit odd, but seems to be the best way. ## Implementation example Here is the rollup aggregate for `TimeWeightSummary`: ```SQL , ignore CREATE AGGREGATE rollup(tws TimeWeightSummary) ( sfunc = time_weight_summary_trans, stype = internal, finalfunc = time_weight_final, combinefunc = time_weight_combine, serialfunc = time_weight_trans_serialize, deserialfunc = time_weight_trans_deserialize, parallel = restricted ); ``` ### Parallel safety The aggregate above is marked as `parallel = restricted`, which specifies that ["the function can be executed in parallel mode, but the execution is restricted to parallel group leader"](https://www.postgresql.org/docs/current/sql-createfunction.html). Note that only the value of the `parallel` parameter of the `CREATE AGGREGATE` call is used for determining the parallel safety of the aggregate; the parallel safetyness of the support functions that make up the aggregate are ignored when the aggregate is called. But all support functions should be marked parallel safe because, AFAIK, they are immutable and parallel safe in all cases, it is only when they are called in the correct ordering with the aggregate that they can cause problems / error if not used correctly. ### Merging on serialization In many cases the implementation of aggregate merging requires that the aggregates to be merged cover non-overlapping periods of time. To handle this while allowing the inputs to be potentially unordered, in the aggregate: - the transition function appends the input to a `Vec` - the final function sorts the transition state and merges all of the elements Storing all of the inputs ever seen in the transition state takes up a lot of memory, and makes the final function use a lot of compute. We can partially alleviate those issues by: - Adding a `combinefunc` that appends the second transition state `Vec` to the first one - Adding a `serialfunc` that: 1. Sorts and merges the transition state 2. Serializes the transition state - Adding a `deserialfunc` that deserializes the transition state These extra functions improve performance when the inputs are partitioned since each partition is combined, and then the partition combinations are combined again. `serialfunc` is called right before sending the current transition state from the parallel worker to the parent process, so it's the only place where we can do the sorting/merging of the transition state before it gets sent to the parent process. We do the merging in the parallel worker to reduce the amount of data sent from the parallel worker to the parent process. ![Each group of days is sorted and merged, then each group is sorted and merged](images/pgmerging.svg) This method doesn't work when two partitions contain overlapping time ranges. That shouldn't happen when the partitions are chunks of a TimescaleDB hypertable, but it could happen when the partitions cover overlapping segments of time (e.g. a table that uses declarative partitioning to partition a table using the hash of an ID). When two partitions contain overlapping time ranges, the implementation should catch that and give an error. Note that this approach means that `deserialfunc(serialfunc(x)) != x`, which is weird but doesn't seem to cause any problems. ================================================ FILE: docs/percentile_approximation.md ================================================ # Approximate Percentiles > [Why To Use Approximate Percentiles](#why-use)
> [API](#percentile-approx-api)
> [Advanced Usage: Algorithms and How to Choose](#advanced-usage)
###### A note on terminology: Technically, a percentile divides the group into 100 equally sized (by frequency) buckets, while a quantile would divide the group into an arbitrary number of buckets. We use percentile here with the recognition that while quantile is the technically more "correct" term for an arbitrary precision operation, percentile has become more commonly used to describe this type of function. ## Why to Use Approximate Percentiles There are really two things to cover here: 1) [why use percentiles at all](#why-use-percent) and 2) [why use *approximate* percentiles rather than exact percentiles](#why-approximate). To better understand this, we'll use the common example of a server that's running APIs for a company and tracking the response times for the various APIs it's running. So, for our example, we have a table something like this: ```SQL , non-transactional, ignore-output SET extra_float_digits = -3; -- use 12 digits of precision to reduce flakiness SET SESSION TIME ZONE 'UTC'; -- so we get consistent output CREATE TABLE response_times ( ts timestamptz, api_id int, user_id int, response_time_ms float ); -- and we'll make it a hypertable for ease of use in the rest of the example SELECT create_hypertable('response_times', 'ts'); ```
We'll also generate some data to work with here. And insert it into the table (expand for the generation script if you want to see it). ```SQL , non-transactional, ignore-output WITH apis as MATERIALIZED (SELECT generate_series(1, 12) as api_id), users as MATERIALIZED (SELECT generate_series(1, 30) as user_id), api_users as MATERIALIZED (SELECT * FROM apis JOIN users on api_id % 3 = user_id % 3), -- users use ~ 1/3 of apis times as MATERIALIZED (SELECT generate_series('2020-01-01'::timestamptz, '2020-01-02'::timestamptz, '1 minute'::interval) as ts), raw_joined as MATERIALIZED (SELECT * from api_users CROSS JOIN times ORDER BY api_id, user_id, ts), generated_data as MATERIALIZED ( SELECT ts + '5 min'::interval * test_random() as ts, api_id, user_id, 10 * api_id * user_id / (1+(extract(hour FROM ts)::int % api_id)) * test_random() as response_time FROM raw_joined ORDER BY api_id, user_id, ts) INSERT INTO response_times SELECT * FROM generated_data; ``` It's not the most representative of data sets, but it'll do and have some interesting features for us to look at.
--- ### Why use percentiles? In general, percentiles are useful for understanding the distribution of your data, for instance the 50% percentile, aka median of the data can be a more useful measure than average when there are outliers that would dramatically impact the average, but have a much smaller impact on the median. The median or 50th percentile means that in an ordered list of your data half of the data will be greater and half less, the 10% percentile would mean that 10% would fall below and 90% above the value returned and the 99th percentile would mean that 1% is above the value returned, 99% below. Outliers have less of an impact because their magnitude doesn't affect their percentile, only their order in the set, so the skew introduced by uncommon very large or very small values is reduced or eliminated. Let's look at an example with our generated data set, and lets say we want to find the worst apis, in an hour segment, so that we can identify poor performance, we'll start by using the Postgres [percentile_disc]() function for our percentiles: ```SQL SELECT time_bucket('1 h'::interval, ts) as bucket, api_id, avg(response_time_ms), percentile_disc(0.5) WITHIN GROUP (ORDER BY response_time_ms) as median FROM response_times GROUP BY 1, 2 ORDER BY 3 DESC LIMIT 15; ``` ```output, precision(2: 7) bucket | api_id | avg | median -----------------------+--------+---------------+-------------- 2020-01-01 00:00:00+00 | 12 | 993.878689655 | 751.68 2020-01-01 12:00:00+00 | 12 | 948.4199 | 714.6 2020-01-01 00:00:00+00 | 11 | 848.218549223 | 638 2020-01-01 22:00:00+00 | 11 | 824.517045075 | 606.32 2020-01-01 11:00:00+00 | 11 | 824.277392027 | 603.79 2020-01-01 00:00:00+00 | 9 | 739.073793103 | 562.95 2020-01-01 00:00:00+00 | 10 | 731.558894646 | 547.5 2020-01-01 18:00:00+00 | 9 | 724.052854758 | 536.22 2020-01-01 09:00:00+00 | 9 | 719.944816054 | 529.74 2020-01-01 20:00:00+00 | 10 | 696.328870432 | 500.8 2020-01-01 10:00:00+00 | 10 | 694.303472454 | 507.5 2020-01-01 00:00:00+00 | 8 | 622.262145329 | 466.56 2020-01-01 08:00:00+00 | 8 | 597.849434276 | 437.12 2020-01-01 16:00:00+00 | 8 | 597.591488294 | 433.92 2020-01-02 00:00:00+00 | 11 | 583.857241379 | 383.35 ``` So, this returns some interesting results, maybe something like what those of you who read over our [data generation](#data-generation) code would expect. Given how we generate the data, we expect that the larger `api_ids` will have longer generated response times but that it will be cyclic with `hour % api_id`, so we can see that here. But what happens if we introduce some aberrant data points? They could have come from anywhere, maybe a user ran a weird query, maybe there's an odd bug in the code that causes some timings to get multiplied in an odd code path, who knows, here we'll introduce just 10 outlier points out of half a million: ```SQL , non-transactional, ignore-output WITH rand_points as (SELECT ts, api_id, user_id FROM response_times ORDER BY test_random() LIMIT 10) UPDATE response_times SET response_time_ms = 10000 * response_time_ms WHERE (ts, api_id, user_id) IN (SELECT * FROM rand_points); ``` ```SQL SELECT time_bucket('1 h'::interval, ts) as bucket, api_id, avg(response_time_ms), percentile_disc(0.5) WITHIN GROUP (ORDER BY response_time_ms) as median FROM response_times GROUP BY 1, 2 ORDER BY 3 DESC LIMIT 15; ``` ```output, precision(2: 7) bucket | api_id | avg | median ------------------------+--------+---------------+--------------- 2020-01-01 14:00:00+00 | 1 | 1658.34585977 | 53.46 2020-01-01 06:00:00+00 | 1 | 1226.37258765 | 53.77 2020-01-01 23:00:00+00 | 1 | 1224.1063 | 53.55 2020-01-01 00:00:00+00 | 12 | 993.878689655 | 751.68 2020-01-01 11:00:00+00 | 1 | 961.352933333 | 53.76 2020-01-01 12:00:00+00 | 12 | 948.4199 | 714.6 2020-01-01 00:00:00+00 | 11 | 848.218549223 | 638 2020-01-01 21:00:00+00 | 1 | 846.309280936 | 52.92 2020-01-01 04:00:00+00 | 1 | 845.378981636 | 54.78 2020-01-01 22:00:00+00 | 11 | 824.517045075 | 606.32 2020-01-01 11:00:00+00 | 11 | 824.277392027 | 603.79 2020-01-01 00:00:00+00 | 9 | 739.073793103 | 562.95 2020-01-01 00:00:00+00 | 10 | 731.558894646 | 547.5 2020-01-01 18:00:00+00 | 9 | 724.052854758 | 536.22 2020-01-01 09:00:00+00 | 9 | 719.944816054 | 529.74 ``` Now, `avg` is giving horribly misleading results and not showing us the underlying patterns in our data anymore. But if I order by the `median` instead: ```SQL SELECT time_bucket('1 h'::interval, ts) as bucket, api_id, avg(response_time_ms), percentile_disc(0.5) WITHIN GROUP (ORDER BY response_time_ms) as median FROM response_times GROUP BY 1, 2 ORDER BY 4 DESC, 2, 1 LIMIT 15; ``` ```output, precision(2: 7) bucket | api_id | avg | median ------------------------+--------+---------------+--------------- 2020-01-01 00:00:00+00 | 12 | 993.878689655 | 751.68 2020-01-01 12:00:00+00 | 12 | 948.4199 | 714.6 2020-01-01 00:00:00+00 | 11 | 848.218549223 | 638 2020-01-01 22:00:00+00 | 11 | 824.517045075 | 606.32 2020-01-01 11:00:00+00 | 11 | 824.277392027 | 603.79 2020-01-01 00:00:00+00 | 9 | 739.073793103 | 562.95 2020-01-01 00:00:00+00 | 10 | 731.558894646 | 547.5 2020-01-01 18:00:00+00 | 9 | 724.052854758 | 536.22 2020-01-01 09:00:00+00 | 9 | 719.944816054 | 529.74 2020-01-01 10:00:00+00 | 10 | 694.303472454 | 507.5 2020-01-01 20:00:00+00 | 10 | 696.328870432 | 500.8 2020-01-01 00:00:00+00 | 8 | 622.262145329 | 466.56 2020-01-01 08:00:00+00 | 8 | 597.849434276 | 437.12 2020-01-01 16:00:00+00 | 8 | 597.591488294 | 433.92 2020-01-01 01:00:00+00 | 12 | 511.567512521 | 390.24 ``` I can see the pattern in my data again! The median was much better at dealing with outliers than `avg` was, and percentiles in general are much less noisy. This becomes even more obvious where we might want to measure the worst case scenario for users. So we might want to use the `max`, but often the 99th percentile value gives a better representation of the *likely* worst outcome for users than the max response time, which might be due to unrealistic parameters, an error, or some other non-representative condition. The maximum response time becomes something useful for engineers to investigate, ie to find errors or other weird outlier use cases, but less useful for, say, measuring overall user experience and how it changes over time. Both are useful for different circumstances, but often the 95th or 99th or other percentile outcome becomes the design parameter and what we measure success against. --- ### Why use *approximate* percentiles? One reason that percentiles are less frequently used than, say, average, min, max or other measures of a distribution is that they are significantly more expensive to perform (in terms of cpu and memory) than traditional aggregates. This is because an exact computation of the percentile (using say, Postgres' [`percentile_cont`]() or [`percentile_disc`]() ) requires the full data set as an ordered list. This is unlike, say, the maximum where I can scan my data set and just keep the largest value I see, for percentiles I need to order the entire data set in order to find the 99th percentile or the 50th percentile etc. This also means that the aggregates are not partializable or parallelizable; there isn't a great form that will allow me to compute the exact percentile on part of my data and combine that with information from another part and give me an exact percentile back. I need all the data, ordered appropriately in order to calculate the exact result. This is where approximation algorithms come into play: they allow for the calculation of a "good enough" percentile without using all of the data and ordering it before returning a result. There are multiple types of approximation algorithms, we've implemented two of them to start ([uddsketch]() and [tdigest]()), but if you're just getting started, we recommend trying out our [default implementation](), which uses the `uddsketch` implementation, but doesn't require twiddling of various knobs by the user. We believe this will be good enough for most cases, but if you run into an edge case or want different tradeoffs in terms of accuracy etc. we recommend reading [about the algorithms and tradeoffs below]() . Let's look back at our example from above and use our approximation algorithm alongside: ```SQL SELECT time_bucket('1 h'::interval, ts) as bucket, api_id, avg(response_time_ms), percentile_disc(0.5) WITHIN GROUP (ORDER BY response_time_ms) as true_median, approx_percentile(0.5, percentile_agg(response_time_ms)) as approx_median FROM response_times GROUP BY 1, 2 ORDER BY 5 DESC LIMIT 15; ``` ```output, precision(2: 7) bucket | api_id | avg | true_median | approx_median ------------------------+--------+---------------+---------------+--------------- 2020-01-01 00:00:00+00 | 12 | 993.878689655 | 751.68 | 764.998764437 2020-01-01 12:00:00+00 | 12 | 948.4199 | 714.6 | 717.572650369 2020-01-01 00:00:00+00 | 11 | 848.218549223 | 638 | 631.358694271 2020-01-01 22:00:00+00 | 11 | 824.517045075 | 606.32 | 611.475044532 2020-01-01 11:00:00+00 | 11 | 824.277392027 | 603.79 | 611.475044532 2020-01-01 00:00:00+00 | 9 | 739.073793103 | 562.95 | 573.566636623 2020-01-01 00:00:00+00 | 10 | 731.558894646 | 547.5 | 555.503056905 2020-01-01 18:00:00+00 | 9 | 724.052854758 | 536.22 | 538.008361239 2020-01-01 09:00:00+00 | 9 | 719.944816054 | 529.74 | 538.008361239 2020-01-01 20:00:00+00 | 10 | 696.328870432 | 500.8 | 504.654521865 2020-01-01 10:00:00+00 | 10 | 694.303472454 | 507.5 | 504.654521865 2020-01-01 00:00:00+00 | 8 | 622.262145329 | 466.56 | 473.368454447 2020-01-01 08:00:00+00 | 8 | 597.849434276 | 437.12 | 444.021967419 2020-01-01 16:00:00+00 | 8 | 597.591488294 | 433.92 | 444.021967419 2020-01-01 01:00:00+00 | 12 | 511.567512521 | 390.24 | 390.674211779 ``` Pretty darn close! We can definitely still see the patterns in the data. Note that the calling conventions are a bit different for ours, partially because it's no longer an [ordered set aggregate](), and partially because we use [two-step aggregation](), see the [API documentation]() below for exactly how to use. The approximation algorithms can provide better performance than algorithms that need the whole sorted data set, especially on very large data sets that can't be easily sorted in memory. Not only that, but they are able to be incorporated into [continuous aggregates](), because they have partializable forms, can be used in [parallel]() and [partitionwise]() aggregation. They are used very frequently in continuous aggregates as that's where they give the largest benefit over the usual Postgres percentile algorithms, which can't be used at all because they require the entire ordered data set to function. Let's do this with our example, we can't use `percentile_disc` anymore as ordered set aggregates are not supported. ```SQL , non-transactional, ignore-output CREATE MATERIALIZED VIEW response_times_hourly WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('1 h'::interval, ts) as bucket, api_id, avg(response_time_ms), percentile_agg(response_time_ms) FROM response_times GROUP BY 1, 2; ``` Note that we only do the aggregation step of our [two-step aggregation](), we'll save the accessor step for our selects from the view, and we'll start by just getting the same data as our previous example like so: ```SQL SELECT bucket, api_id, avg, approx_percentile(0.5, percentile_agg) as approx_median FROM response_times_hourly ORDER BY 4 DESC, 2, 1 LIMIT 15; ``` ```output, precision(2: 7) bucket | api_id | avg | approx_median ------------------------+--------+---------------+--------------- 2020-01-01 00:00:00+00 | 12 | 993.878689655 | 764.998764437 2020-01-01 12:00:00+00 | 12 | 948.4199 | 717.572650369 2020-01-01 00:00:00+00 | 11 | 848.218549223 | 631.358694271 2020-01-01 11:00:00+00 | 11 | 824.277392027 | 611.475044532 2020-01-01 22:00:00+00 | 11 | 824.517045075 | 611.475044532 2020-01-01 00:00:00+00 | 9 | 739.073793103 | 573.566636623 2020-01-01 00:00:00+00 | 10 | 731.558894646 | 555.503056905 2020-01-01 09:00:00+00 | 9 | 719.944816054 | 538.008361239 2020-01-01 18:00:00+00 | 9 | 724.052854758 | 538.008361239 2020-01-01 10:00:00+00 | 10 | 694.303472454 | 504.654521865 2020-01-01 20:00:00+00 | 10 | 696.328870432 | 504.654521865 2020-01-01 00:00:00+00 | 8 | 622.262145329 | 473.368454447 2020-01-01 08:00:00+00 | 8 | 597.849434276 | 444.021967419 2020-01-01 16:00:00+00 | 8 | 597.591488294 | 444.021967419 2020-01-01 00:00:00+00 | 6 | 500.610103448 | 390.674211779 ``` So, that's nifty, and much faster, especially for large data sets. But what's even cooler is I can do aggregates over the aggregates and speed those up, let's look at the median by `api_id`: ```SQL SELECT api_id, approx_percentile(0.5, rollup(percentile_agg)) as approx_median FROM response_times_hourly GROUP BY api_id ORDER BY api_id; ``` ```output api_id | approx_median --------+--------------- 1 | 54.5702804443 2 | 80.1171187405 3 | 103.491515519 4 | 91.0573557571 5 | 110.331520385 6 | 117.623597735 7 | 110.331520385 8 | 117.623597735 9 | 133.685458898 10 | 117.623597735 11 | 125.397626136 12 | 133.685458898 ``` You'll notice that I didn't include the average response time here, that's because `avg` is not a [two-step aggregate](), and doesn't actually give you the average if you stack calls using it. But it turns out, we can derive the true average from the sketch we use to calculate the approximate percentiles! (We call that accessor function `mean` because there would otherwise be odd conflicts with `avg` in terms of how they're called). ```SQL SELECT api_id, mean(rollup(percentile_agg)) as avg, approx_percentile(0.5, rollup(percentile_agg)) as approx_median FROM response_times_hourly GROUP BY api_id ORDER BY api_id; ``` ```output, precision(1: 7) api_id | avg | approx_median --------+---------------+--------------- 1 | 358.974815406 | 54.5702804443 2 | 116.208743234 | 80.1171187405 3 | 151.194417418 | 103.491515519 4 | 150.963527481 | 91.0573557571 5 | 180.906869604 | 110.331520385 6 | 202.234328036 | 117.623597735 7 | 203.056659681 | 110.331520385 8 | 210.823512283 | 117.623597735 9 | 250.775971756 | 133.685458898 10 | 239.834855656 | 117.623597735 11 | 267.750932477 | 125.397626136 12 | 256.252763567 | 133.685458898 ``` We have several other accessor functions, including `error` which returns the maximum relative error for the percentile estimate, `num_vals` which returns the number of elements in the estimator, and perhaps the most interesting one, `approx_percentile_rank`, which gives the hypothetical percentile for a given value. Let's say we really don't want our apis to go over 1s in response time (1000 ms), we can use that to figure out what fraction of users waited over a second for each api: ```SQL SELECT api_id, ((1 - approx_percentile_rank(1000, rollup(percentile_agg))) * 100)::numeric(6,2) as percent_over_1s FROM response_times_hourly GROUP BY api_id ORDER BY api_id; ``` ```output api_id | percent_over_1s --------+----------------- 1 | 0.07 2 | 0.00 3 | 0.00 4 | 0.40 5 | 1.56 6 | 2.54 7 | 2.87 8 | 3.30 9 | 4.56 10 | 4.54 11 | 5.90 12 | 4.97 ``` ## API Aggregate Functions > - [percentile_agg (point form)](#point-form) > - [rollup (summary form)](#summary-form) Accessor Functions > - [error](#error) > - [mean](#mean) > - [num_vals](#num-vals) > - [approx_percentile](#approx_percentile) > - [approx_percentile_rank](#approx_percentile-at-value) --- ## **percentile_agg (point form)** ```SQL ,ignore percentile_agg( value DOUBLE PRECISION ) RETURNS UddSketch ``` This is the default percentile aggregation function. Under the hood, it uses the [UddSketch algorithm](/docs/uddsketch.md) with 200 buckets and an initial max error of 0.001. This should be good for most common use cases of percentile approximation. For more advanced usage of the uddsketch algorithm or use cases for other percentile approximation algorithms see [advanced usage](#advanced-usage). This is the aggregation step of the [two-step aggregate](/docs/two-step_aggregation.md), it is usually used with the [approx_percentile()](#approx_percentile) accessor function in order to extract an approximate percentile, however it is in a form that can be re-aggregated using the [summary form](#summary-form) of the function and any of the other [accessor functions](#accessor-functions). ### Required Arguments |Name| Type |Description| |---|---|---| | `value` | `DOUBLE PRECISION` | Column to aggregate.
### Returns |Column|Type|Description| |---|---|---| | `percentile_agg` | `UddSketch` | A UddSketch object which may be passed to other percentile approximation APIs| Because the `percentile_agg` function uses the [UddSketch algorithm](/docs/uddsketch.md), it returns the UddSketch data structure for use in further calls.
### Sample Usages Get the approximate first percentile using the `percentile_agg()` point form plus the [`approx_percentile`](#approx_percentile) accessor function. ```SQL SELECT approx_percentile(0.01, percentile_agg(data)) FROM generate_series(0, 100) data; ``` ```output approx_percentile ------------------- 0.999 ``` They are often used to create [continuous aggregates]() after which we can use multiple [accessors](#accessor-functions) for [retrospective analysis](/docs/two-step_aggregation.md#retrospective-analysis). ```SQL ,ignore CREATE MATERIALIZED VIEW foo_hourly WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('1 h'::interval, ts) as bucket, percentile_agg(value) as pct_agg FROM foo GROUP BY 1; ``` --- ## **rollup (summary form)** ```SQL ,ignore rollup( sketch uddsketch ) RETURNS UddSketch ``` This will combine multiple outputs from the [point form](#point-form) of the `percentile_agg()` function, this is especially useful for re-aggregation in the [continuous aggregate]() context (ie bucketing by a larger [`time_bucket`](), or re-grouping on other dimensions included in an aggregation). ### Required Arguments |Name| Type |Description| |---|---|---| | `sketch` | `UddSketch` | The already constructed uddsketch from a previous [percentile_agg()](#point-form) call. |
### Returns |Column|Type|Description| |---|---|---| | `uddsketch` | `UddSketch` | A UddSketch object which may be passed to other UddSketch APIs. | Because the `percentile_agg` function uses the [UddSketch algorithm](/docs/uddsketch.md), `rollup` returns the UddSketch data structure for use in further calls.
### Sample Usages Let's presume we created the [continuous aggregate]() in the [point form example](#point-form-examples): We can then rollup function to re-aggregate the results from the `foo_hourly` view and the [`approx_percentile`](#approx_percentile) accessor function to get the 95th and 99th percentiles over each day: ```SQL , ignore SELECT time_bucket('1 day'::interval, bucket) as bucket, approx_percentile(0.95, rollup(pct_agg)) as p95, approx_percentile(0.99, rollup(pct_agg)) as p99 FROM foo_hourly GROUP BY 1; ``` --- ## **error** ```SQL ,ignore error(sketch UddSketch) RETURNS DOUBLE PRECISION ``` This returns the maximum relative error that a percentile estimate will have (relative to the correct value). This means the actual value will fall in the range defined by `approx_percentile(sketch) +/- approx_percentile(sketch)*error(sketch)`. ### Required Arguments |Name|Type|Description| |---|---|---| | `sketch` | `UddSketch` | The sketch to determine the error of, usually from a [`percentile_agg()`](#aggregate-functions) call. |
### Returns |Column|Type|Description| |---|---|---| | `error` | `DOUBLE PRECISION` | The maximum relative error of any percentile estimate. |
### Sample Usages ```SQL SELECT error(percentile_agg(data)) FROM generate_series(0, 100) data; ``` ```output error ------- 0.001 ``` --- ## **mean** ```SQL ,ignore mean(sketch UddSketch) RETURNS DOUBLE PRECISION ``` Get the exact average of all the values in the percentile estimate. (Percentiles returned are estimates, the average is exact. ### Required Arguments |Name|Type|Description| |---|---|---| | `sketch` | `UddSketch` | The sketch to extract the mean value from, usually from a [`percentile_agg()`](#aggregate-functions) call. |
### Returns |Column|Type|Description| |---|---|---| | `mean` | `DOUBLE PRECISION` | The average of the values in the percentile estimate. |
### Sample Usage ```SQL SELECT mean(percentile_agg(data)) FROM generate_series(0, 100) data; ``` ```output mean ------ 50 ``` ## **num_vals** ```SQL ,ignore num_vals(sketch UddSketch) RETURNS DOUBLE PRECISION ``` Get the number of values contained in a percentile estimate. ### Required Arguments |Name|Type|Description| |---|---|---| | `sketch` | `UddSketch` | The sketch to extract the number of values from, usually from a [`percentile_agg()`](#aggregate-functions) call. |
### Returns |Column|Type|Description| |---|---|---| | `uddsketch_count` | `DOUBLE PRECISION` | The number of values in the percentile estimate |
### Sample Usage ```SQL SELECT num_vals(percentile_agg(data)) FROM generate_series(0, 100) data; ``` ```output num_vals ----------- 101 ``` --- --- ## **approx_percentile** ```SQL ,ignore approx_percentile( percentile DOUBLE PRECISION, sketch uddsketch ) RETURNS DOUBLE PRECISION ``` Get the approximate value at a percentile from a percentile estimate. ### Required Arguments |Name|Type|Description| |---|---|---| | `approx_percentile` | `DOUBLE PRECISION` | The desired percentile (0.0-1.0) to approximate. | | `sketch` | `UddSketch` | The sketch to compute the approx_percentile on, usually from a [`percentile_agg()`](#aggregate-functions) call. |
### Returns |Column|Type|Description| |---|---|---| | `approx_percentile` | `DOUBLE PRECISION` | The estimated value at the requested percentile. |
### Sample Usage ```SQL SELECT approx_percentile(0.01, percentile_agg(data)) FROM generate_series(0, 100) data; ``` ```output approx_percentile ------------------- 0.999 ``` --- ## **approx_percentile_rank** ```SQL ,ignore approx_percentile_rank( value DOUBLE PRECISION, sketch UddSketch ) RETURNS UddSketch ``` Estimate what percentile a given value would be located at in a UddSketch. ### Required Arguments |Name|Type|Description| |---|---|---| | `value` | `DOUBLE PRECISION` | The value to estimate the percentile of. | | `sketch` | `UddSketch` | The sketch to compute the percentile on. |
### Returns |Column|Type|Description| |---|---|---| | `approx_percentile_rank` | `DOUBLE PRECISION` | The estimated percentile associated with the provided value. |
### Sample Usage ```SQL SELECT approx_percentile_rank(99, percentile_agg(data)) FROM generate_series(0, 100) data; ``` ```output approx_percentile_rank ------------------------ 0.985148514851 ``` ## Advanced Usage: Percentile Approximation Algorithms and How to Choose While the simple `percentile_agg` interface will be sufficient for many users, we do provide more specific APIs for advanced users who want more control of how their percentile approximation is computed and how much space the intermediate representation uses. We currently provide implementations of the following percentile approximation algorithms: - [T-Digest](/docs/tdigest.md) – This algorithm buckets data more aggressively toward the center of the quantile range, giving it greater accuracy near the tails (i.e. 0.001 or 0.995). - [UddSketch](/docs/uddsketch.md) – This algorithm uses exponentially sized buckets to guarantee the approximation falls within a known error range, relative to the true discrete percentile. There are different tradeoffs that each algorithm makes, and different use cases where each will shine. The doc pages above each link to the research papers fully detailing the algorithms if you want all the details. However, at a higher level, here are some of the differences to consider when choosing an algorithm: 1) First off, it's interesting to note that the formal definition for a percentile is actually impercise, and there are different methods for determining what the true percentile actually is. In Postgres, given a target percentile 'p', `percentile_disc` will return the smallest element of a set such that 'p' percent of the set is less than that element, while `percentile_cont` will return an interpolated value between the two nearest matches for 'p'. The difference here isn't usually that interesting in practice, but if it matters to your use case, then keep in mind that TDigest will approximate the continuous percentile while UddSketch provides an estimate of the discrete value. 2) It's also important to consider the types of percentiles you're most interested in. In particular, TDigest is optimized to trade off more accurate estimates at the extremes with weaker estimates near the median. If your work flow involves estimating 99th percentiles, this is probably a good trade off. However if you're more concerned about getting highly accurate median estimates, UddSketch is probably a better fit. 3) UddSketch has a stable bucketing function, so it will always return the same quantile estimate for the same underlying data, regardless of how it is ordered or reaggregated. TDigest, on the other hand, builds up incremental buckets based on the average of nearby points, which will result in (usually subtle) differences in estimates based on the same data, unless the order and batching of the aggregation is strictly controlled (which can be difficult to do in Postgres). Therefore, if having stable estimates is important to you, UddSketch will likely be required. 4) Trying to calculate precise error bars for TDigest can be difficult, especially when merging multiple subdigests into a larger one (this can come about either through summary aggregation or just parallelization of the normal point aggregate). If being able to tightly characterize your error is important, UddSketch will likely be the desired algorithm. 5) That being said, the fact that UddSketch uses exponential bucketing to provide a guaranteed relative error can cause some wildly varying absolute errors if the data set covers a large range. For instance if the data is evenly distributed over the range [1,100], estimates at the high end of the percentile range would have about 100 times the absolute error of those at the low end of the range. This gets much more extreme if the data range is [0,100]. If having a stable absolute error is important to your use case, consider TDigest. 6) While both implementation will likely get smaller and/or faster with future optimizations, in general UddSketch will end up with a smaller memory footprint than TDigest, and a correspondingly smaller disk footprint for any continuous aggregates. This is one of the main reasons that the default `percentile_agg` uses UddSketch, and is a pretty good reason to prefer that algorithm if your use case doesn't clearly benefit from TDigest. Regardless of the algorithm, the best way to improve the accuracy of your percentile estimates is to increase the number of buckets, and UddSketch gives you more leeway to do so. ================================================ FILE: docs/release.md ================================================ # Release and build procedures We build the timescaledb_toolkit extension using Cargo, but we have many higher-level tasks in need of automation: - Build, lint, and test with particular flags in multiple environments - Extract SQL examples from documentation and test - Test upgrades - Installation - Publish a release - Make a container image to run all the above on The rest of this document elaborates on each of those. But first.. ## Dependency management Ideally, all dependencies would be specified in just one place. But that's not quite feasible. Cargo.toml files capture the crate dependencies. The rest are needed by the six shell scripts used to solve the above list of problems. We configure those in `tools/dependencies.sh`. ## Build, lint, and test `tools/build` is the relatively simple shell script that owns the cargo flags for running clippy, running tests, installing, and testing upgrades. The latter two are arguably out of place here. Testing upgrades is now handled by `testbin` (below), but the version here was useful for Mac. That has now degraded as it would need to support a third pgrx... Installing is only relevant for local development. ## Extract SQL examples from documentation and test `tools/sql-doctester` is a Rust program which extracts example SQL programs and expected output from documentation, runs the programs, and asserts their output matches what was expected. The intent here is merely to prevent sample code from bitrotting, but some functionality is currently only tested here. ## Test upgrades We include in each release a set of scripts to upgrade an installation from a set of previous versions. We test these upgrades by installing a supported old version, materializing some data, running the upgrade script, and asserting the extension can still load the old data. `tools/update-tester` is a Rust program which loads tests from `tests/update` to implement the materialize and verify steps. It needs to know which version each function was stabilized in, and we store that information in `extension/src/stabilization_info.rs` (also used by post-install, see below). `tools/testbin` is a shell script that uses `update-tester` to test upgrades between released binaries (deb and rpm). ## Installation Installation is a two-step process currently duplicated in three places. The two steps are: 1. `cargo pgrx install --release` OR `cargo pgrx package` 2. `tools/post-install` These steps are repeated in: 1. `Readme.md` 2. `tools/build` 3. `toolkit/package-deb.sh` and `toolkit/package-rpm.sh` `Readme.md` could simply recommend running `tools/build install`. `package-deb.sh` and `package-rpm.sh` could run `tools/build package` (which doesn't yet exist). `cargo pgrx install` installs the extension into the directory specified by `pg_config`. `cargo pgrx package` installs into a directory under `$CARGO_TARGET_DIR` where we pick it up and pack it into deb and rpm packages. `tools/post-install` performs miscellaneous install-time procedures: - finalize control file - rename `timescaledb_toolkit.so` to include the version number - generate update scripts `tools/post-install` needs to know which version each function was stabilized in, and we store that information in `extension/src/stabilization_info.rs`. ## Publish a release `tools/release` automates all the steps of our release process. We run it via github action (`.github/workflows/release.yml`). `tools/release` creates and pushes a release branch and tag, runs tests, starts a package build, prepares the `main` branch for the next release, and creates an issue so we don't forget some tasks not yet automated. The package build happens in a different repository for reasons described in comments at the top of `tools/release`. Over in that repository, we have `.github/workflows/toolkit-package.yml` which runs `toolkit/docker-run-package.sh` to build packages and `toolkit/upload-packages.sh` to upload them to PackageCloud. `toolkit/docker-run-package.sh` runs `toolkit/package-deb.sh` and `toolkit/package-rpm.sh` in various container images to build packags for those platforms. Which platforms we build for is controlled in the `yml` action file. ### Usage: 1. https://github.com/timescale/timescaledb-toolkit/actions/workflows/release.yml 2. Click "Run workflow" 3. Fill out the form 4. Be sure to replace `-n` with `-push`! We can replace this last one with a checkbox: - unchecked: run with neither `-n` nor `-push` - checked: run with `-push` The script has three modes: - `-n`: print what would be done without doing anything - `-push`: do everything including pushing to Github and PackageCloud - neither: do all the work (branch, edit, test, package, upgrade-test), but don't push anywhere The third mode is the most useful but it is not available from the Github action. Very sad. We need to fix that. ### Resume after failure Up until the packaging step, just rerun the release action after the problem is resolved. If packages have been published, the choices are: - do the rest of what the script does manually - increment the patch revision (1.3.X) and start another release An obvious improvement would be to teach `tools/release` to resume at a specific step, something like `tools/release --start-at-step 7`. It would need to verify that the previous steps were actually done and bail out if not. Once packaging is no longer asynchronous in the other repository, `tools/release` can simply be taught to figure out which steps are done all on its own, without an operator having to tell it where to resume. ### Debugging We run `tools/release` with the shell's `-x` option so it prints each command it runs. We redirect the standard error stream to the standard output because Docker will otherwise separate them such that error messages may appear far from related output. So, when something goes wrong, it is easy to pinpoint exactly which part of the script failed and how. Things that can go wrong: #### Transient network hiccough This can happen at almost any stage. A simple retry might be the easiest way to see if the issue is transient. If it's not, options are limited: - wait - complain, then wait #### cargo install cargo-edit - Is crates.io down? - Has cargo-edit vanished from crates.io? #### Install gh - The version we use is gone. Find the latest and figure out whether all our usage has been invalidated by incompatible changes. Be careful! - Or, just squirrel away a copy of the old binary and keep rolling, until the underlying APIs it uses break. - The checksum doesn't match. Did they break it? Why would they do such a thing? Were they hacked? Probably should go ahead and update at this point. #### `extension/timescaledb_toolkit.control` problems `tools/release` edits this file, so it is very careful that the file looks the way it expects. It is and should remain very picky. If we've made some unexpected edits, it will complain. If the edits were erroneous, fix them; else, you have to teach `tools/release` what you've done. One of the things it checks is the `upgradeable_from` line. Most importantly, it expects that patch releases are upgradeable from the previous version in the same minor version (e.g. 1.3.1 is upgradeable from 1.3.0). #### `Changelog.md` problems `tools/release` ensures the version being released has Changelog.md entries. It also requires some particular boiler-plate text at the top to know where to make its edits. The boiler-plate is arbitrary text for intended for consumption by the development team. If we change that text, `tools/release` needs to know about it. #### Tests fail Oh boy! Test output is logged. `tools/build test-extension` shouldn't fail since it already passed when the release commit was merged to master. You're not trying to release a commit that didn't pass CI, are you? But, the upgrade tests are being run for the first time! So those might break. We should run `tools/release --no-push` nightly. In the mean time... to the debugger! #### git push fails We've had branch permission problems before... Is the authentication token working? #### `gh` fails Is GitHub API struggling? Is the authentication token working? Has the packaging action in the `release-build-scripts` repository gone missing? ## Make a container image to run all the above on `.github/workflows/toolkit-image.yml` configures the GitHub action which builds all our supported container images. One image is special: debian-11-amd64. This is the one we run all our GitHub actions on. `docker/ci/Dockerfile` is the entry-point and it runs `docker/ci/setup.sh` to do the work: - Create the build user - Install necessary build tools and libraries - Install postgresql and timescaledb - Install `gh` github command-line tool used by `tools/release` - Install Rust and PGRX - Pre-fetch toolkit's crate dependencies to minimize work done at CI time ## Maintenance tasks So, we've automated build and release! ONCE AND FOR ALL. Right? As the great Balki Bartokomous often said: of course not; don't be ridiculous. These are the sorts of things we have to do from time to time: - Update Rust. It moves pretty fast. - Update PGRX. It moves even faster. - Update other crates. `cargo audit` and `cargo update` are our friends. - Update OS versions. Labels such as `rockylinux:9` eventually point to something different or disappear entirely. The former actually surprised us once already. ### Things we update blindly We install the latest version of these every time, so they may change in surprising ways at inopportune times. - fpm: It's a Ruby script with lots of dependencies and we install the latest version and it bit us on the ass once already. We use it because someone set it up for us a long time ago and no one has had the chance to sit down and figure out how to write an RPM spec file. Shouldn't take more than a few hours, just haven't done it... - postgresql: We install the latest version of a fixed set of major versions, so this should be very unlikely to break on us. Listed for completeness. - timescaledb: We test with their master branch nightly, so we should be ahead of this one. ### Unknown Unknowns lol They're inevitable. You just need a good nose for debugging. ================================================ FILE: docs/rolling_average_api_working.md ================================================ # Info dump on rolling average APIs # Rolling averages are currently nasty to do with with timescaledb (user complaint https://news.ycombinator.com/item?id=27051005). In our timevector API we will eventually provide a function like ```SQL , ignore moving_average(window => '30 minutes', slide => '5 minutes', data) ``` However, because set-returning aggregates cannot not exist in Postgres, this will not work outside of the timevector API. Currently, doing rolling average properly requires windowed aggregates. In base SQL it is a real PITA because you have to do sum and count separately and then divide them yourself. ```SQL , ignore SELECT time_bucket('5 minutes', time) as bucket, sum(sum(value)) OVER thirty_minutes / sum(count(value)) OVER thirty_minutes as rolling_average FROM data GROUP BY 1 WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes', time) RANGE '30 minutes' PRECEDING); ``` Ideally, to do a thirty-minute rolling average every 5 minutes we would provide an API like: ```SQL , ignore SELECT time_bucket('5 minutes', time) as bucket, rolling_average('5 minutes', value) OVER thirty_minutes FROM data GROUP BY bucket WINDOW thirty_minutes as (ORDER BY ts RANGE '30 minutes' PRECEDING); ``` However, this once again runs into postgres limitations: we need to aggregate over the `value` column in order for this to query to be correctly executed; the `rolling_average()` executes strictly after the `GROUP BY`, and will only see things within its 5-minute group. To fix this issue we need a separate aggregation step. First we'll aggregate the data into 5-minute summaries, then we'll re-aggregate over 30-minute windows of summaries ```SQL , ignore SELECT time_bucket('5 minutes'::interval, time) as bucket, average( rolling(stats_agg(value)) OVER thirty_minutes ) FROM foo GROUP BY bucket WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING); ``` While we could create a dedicated `rolling_average()` function used like ```SQL , ignore SELECT time_bucket('5 minutes'::interval, time) as bucket, rolling_average(stats_agg(value)) OVER thirty_minutes FROM foo GROUP BY bucket WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING); ``` for non-trivial cases, where you want to gather multiple statistics over the same data, this ends up significantly less readable, compare ```SQL , ignore SELECT time_bucket('5 minutes'::interval, ts) as bucket, rolling_average(stats_agg(value)) OVER thirty_minutes, rolling_stddev(stats_agg(value)) OVER thirty_minutes, rolling_approx_percentile(0.1, percentile_agg(val1)) OVER thirty_minutes, rolling_approx_percentile(0.9, percentile_agg(val1)) OVER thirty_minutes FROM foo GROUP BY 1 WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING); ``` to ```SQL , ignore SELECT bucket, average(rolling_stats), stddev(rolling_stats), approx_percentile(0.1, rolling_percentile), approx_percentile(0.9, rolling_percentile) FROM ( SELECT time_bucket('5 minutes'::interval, ts) as bucket, rolling(stats_agg(value)) OVER thirty_minutes as rolling_stats, rolling(percentile_agg(value)) OVER thirty_minutes as rolling_percentile FROM foo GROUP BY 1 WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING) ) aggs; ``` since in real world, and all our documentation, we expect to see multi-statistic queries, we plan to optimize for readability in this case, and have separate rollup and query steps. Separating out the re-aggregation step also allows for more powerful composition, for instance: ```SQL , ignore SELECT bucket, average(rolling_stats) as rolling_average, average(rolling(rolling_stats) OVER (ORDER BY bucket)) AS cumulative_average, average(rolling(rolling_stats) OVER ()) as full_set_average, average(rolling_stats) / average(rolling(rolling_stats) OVER ()) as normalized_average FROM ( SELECT time_bucket('5 minutes'::interval, ts) as bucket, rolling(stats_agg(value)) OVER thirty_minutes as rolling_stats FROM foo GROUP BY 1 WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING) ) aggs; ``` ### A note on style and semantics ```SQL , ignore SELECT bucket, average(rolling_stats), stddev(rolling_stats), approx_percentile(0.1, rolling_percentile), approx_percentile(0.9, rolling_percentile) FROM ( SELECT time_bucket('5 minutes'::interval, ts) as bucket, rolling(stats_agg(value)) OVER thirty_minutes as rolling_stats, rolling(percentile_agg(value)) OVER thirty_minutes as rolling_percentile FROM foo GROUP BY 1 WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING) ) aggs; ``` is equivalent to ```SQL , ignore WITH aggs as ( SELECT time_bucket('5 minutes'::interval, ts) as bucket, rolling(stats_agg(value)) OVER thirty_minutes as rolling_stats, rolling(percentile_agg(value)) OVER thirty_minutes as rolling_percentile FROM foo GROUP BY 1 WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING) ) SELECT bucket, average(rolling_stats), stddev(rolling_stats), approx_percentile(0.1, rolling_percentile), approx_percentile(0.9, rolling_percentile) FROM aggs; ``` which is also equivalent to, for understanding the order of operations here ```SQL , ignore WITH aggs as ( SELECT time_bucket('5 minutes'::interval, ts) as bucket, stats_agg(value), percentile_agg(value) FROM foo GROUP BY 1 ), rolling_aggs as ( SELECT bucket rolling(stats_agg) OVER thirty_minutes as rolling_stats, rolling(percentile_agg) OVER thirty_minutes as rolling_percentile FROM aggs WINDOW thirty_minutes as (ORDER BY bucket RANGE '30 minutes' PRECEDING) ) SELECT bucket, average(rolling_stats), stddev(rolling_stats), approx_percentile(0.1, rolling_percentile), approx_percentile(0.9, rolling_percentile) FROM rolling_aggs; ``` which is also equivalent to: ```SQL , ignore SELECT bucket, average(rolling_stats), stddev(rolling_stats), approx_percentile(0.1, rolling_percentile), approx_percentile(0.9, rolling_percentile) FROM ( SELECT bucket, rolling(stats_agg) OVER thirty_minutes as rolling_stats, rolling(percentile_agg) OVER thirty_minutes as rolling_percentile FROM ( SELECT time_bucket('5 minutes'::interval, ts) as bucket, stats_agg(value), percentile_agg(value) FROM foo GROUP BY 1 ) aggs WINDOW thirty_minutes as (ORDER BY time_bucket('5 minutes'::interval, ts) RANGE '30 minutes' PRECEDING) ) rolling_aggs; ``` ================================================ FILE: docs/state_agg.md ================================================ # State Aggregation [experimental](/docs/README.md#tag-notes) # Test table Examples below are tested against the following tables: ```SQL ,non-transactional SET TIME ZONE 'UTC'; CREATE TABLE states_test(ts TIMESTAMPTZ, state TEXT); INSERT INTO states_test VALUES ('2020-01-01 00:00:00+00', 'START'), ('2020-01-01 00:00:11+00', 'OK'), ('2020-01-01 00:01:00+00', 'ERROR'), ('2020-01-01 00:01:03+00', 'OK'), ('2020-01-01 00:02:00+00', 'STOP'); CREATE TABLE states_test_2(ts TIMESTAMPTZ, state TEXT); INSERT INTO states_test_2 VALUES ('2019-12-31 00:00:00+00', 'START'), ('2019-12-31 00:00:11+00', 'OK'), ('2019-12-31 00:02:00+00', 'STOP'), ('2019-12-31 00:01:03+00', 'OK'); CREATE TABLE states_test_3(ts TIMESTAMPTZ, state TEXT); INSERT INTO states_test_3 VALUES ('2019-12-31 00:00:11+00', 'UNUSED'), ('2019-12-31 00:01:00+00', 'START'); CREATE TABLE states_test_4(ts TIMESTAMPTZ, state BIGINT); INSERT INTO states_test_4 VALUES ('2020-01-01 00:00:00+00', 4), ('2020-01-01 00:00:11+00', 51351), ('2020-01-01 00:01:00+00', 2), ('2020-01-01 00:01:03+00', 51351), ('2020-01-01 00:02:00+00', -9); CREATE TABLE states_test_5(ts TIMESTAMPTZ, state BIGINT); -- states_test with integer states INSERT INTO states_test_5 VALUES ('2020-01-01 00:00:00+00', 4), ('2020-01-01 00:00:11+00', 51351), ('2020-01-01 00:01:00+00', 2), ('2020-01-01 00:02:03+00', 51351), ('2020-01-01 00:02:05+00', -9); CREATE TABLE states_test_6(ts TIMESTAMPTZ, state BIGINT); -- states_test_3 with integer states INSERT INTO states_test_6 VALUES ('2019-12-31 00:00:11+00', 456789), ('2019-12-31 00:01:00+00', 4); ``` ## Functions ### duration_in Compute the amount of time spent in a state as INTERVAL. ```SQL SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'ERROR') FROM states_test; ``` ```output interval ---------- 00:00:03 ``` ```SQL SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 2) FROM states_test_4; ``` ```output interval ---------- 00:00:03 ``` Extract as number of seconds: ```SQL SELECT EXTRACT(epoch FROM toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'ERROR') )::INTEGER FROM states_test; ``` ```output seconds --------- 3 ``` #### duration_in for a range ```SQL SELECT duration_in(state_agg(ts, state), 'OK', '2020-01-01 00:01:00+00', '2 days') FROM states_test; ``` ```output duration_in ------------- 00:00:57 ``` ```SQL SELECT duration_in(state_agg(ts, state), 'OK', '2020-01-01 00:01:00+00', NULL) FROM states_test; ``` ```output duration_in ------------- 00:00:57 ``` ```SQL SELECT duration_in(state_agg(ts, state), 'OK', '2020-01-01 00:01:00+00') FROM states_test; ``` ```output duration_in ------------- 00:00:57 ``` ```SQL SELECT duration_in(state_agg(ts, state), 51351, '2020-01-01 00:01:00+00', '2 days') FROM states_test_4; ``` ```output duration_in ------------- 00:00:57 ``` ```SQL SELECT duration_in(state_agg(ts, state), 51351, '2020-01-01 00:01:00+00', NULL) FROM states_test_4; ``` ```output duration_in ------------- 00:00:57 ``` ```SQL SELECT duration_in(state_agg(ts, state), 'OK', '2020-01-01 00:00:15+00', '30 seconds') FROM states_test; ``` ```output duration_in ------------- 00:00:30 ``` ```SQL SELECT duration_in(state_agg(ts, state), 51351, '2020-01-01 00:00:15+00', '1 minute 1 second') FROM states_test_4; ``` ```output duration_in ------------- 00:00:58 ``` ```SQL SELECT duration_in(state_agg(ts, state), 'OK', '2020-01-01 00:00:15+00', '1 minute 1 second') FROM states_test; ``` ```output duration_in ------------- 00:00:58 ``` ```SQL SELECT (SELECT state_agg(ts, state) FROM states_test) -> duration_in('OK'::text, '2020-01-01 00:00:15+00', '1 minute 1 second'); ``` ```output ?column? ------------- 00:00:58 ``` ```SQL SELECT (SELECT state_agg(ts, state) FROM states_test) -> duration_in('OK'); ``` ```output ?column? ------------- 00:01:46 ``` ### into_values ```SQL SELECT state, duration FROM toolkit_experimental.into_values( (SELECT toolkit_experimental.compact_state_agg(ts, state) FROM states_test)) ORDER BY state, duration; ``` ```output state | duration -------+----------- ERROR | 00:00:03 OK | 00:01:46 START | 00:00:11 STOP | 00:00:00 ``` ```SQL SELECT state, duration FROM into_int_values( (SELECT state_agg(ts, state) FROM states_test_4)) ORDER BY state, duration; ``` ```output state | duration -------+----------- -9 | 00:00:00 2 | 00:00:03 4 | 00:00:11 51351 | 00:01:46 ``` ```SQL SELECT (state_agg(ts, state) -> into_values()).* FROM states_test ORDER BY state; ``` ```output state | duration -------+---------- ERROR | 00:00:03 OK | 00:01:46 START | 00:00:11 STOP | 00:00:00 ``` ### state_timeline ```SQL SELECT (state_agg(ts, state) -> state_timeline()).* FROM states_test; ``` ```output state | start_time | end_time -------+------------------------+------------------------ START | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT state, start_time, end_time FROM state_timeline( (SELECT state_agg(ts, state) FROM states_test)) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- START | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT state, start_time, end_time FROM state_int_timeline( (SELECT state_agg(ts, state) FROM states_test_4)) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- 4 | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:11+00 51351 | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2 | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 51351 | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 -9 | 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT state, start_time, end_time FROM state_timeline( (SELECT state_agg(ts, state) FROM states_test_2)) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- START | 2019-12-31 00:00:00+00 | 2019-12-31 00:00:11+00 OK | 2019-12-31 00:00:11+00 | 2019-12-31 00:02:00+00 STOP | 2019-12-31 00:02:00+00 | 2019-12-31 00:02:00+00 ``` ### state_in ```SQL SELECT state_at( (SELECT state_agg(ts, state) FROM states_test), '2020-01-01 00:01:02+00' ); ``` ```output state_at ---------- ERROR ``` ```SQL SELECT state_at_int( (SELECT state_agg(ts, state) FROM states_test_5), '2020-01-01 00:01:02+00' ); ``` ```output state_at ---------- 2 ``` ```SQL SELECT state_at( (SELECT state_agg(ts, state) FROM states_test), '2020-01-01 00:01:00+00' ); ``` ```output state_at ---------- ERROR ``` ```SQL SELECT state_at( (SELECT state_agg(ts, state) FROM states_test), '2020-01-01 00:00:05+00' ); ``` ```output state_at ---------- START ``` ```SQL SELECT state_at( (SELECT state_agg(ts, state) FROM states_test), '2020-01-01 00:00:00+00' ); ``` ```output state_at ---------- START ``` ```SQL SELECT state_at( (SELECT state_agg(ts, state) FROM states_test), '2019-12-31 23:59:59.999999+00' ); ``` ```output state_at ---------- ``` ```SQL SELECT state_at( (SELECT state_agg(ts, state) FROM states_test), '2025-01-01 00:00:00+00' ); ``` ```output state_at ---------- STOP ``` ```SQL SELECT (SELECT state_agg(ts, state) FROM states_test) -> state_at('2025-01-01 00:00:00+00'); ``` ```output ?column? ---------- STOP ``` ## state_periods ```SQL SELECT start_time, end_time FROM state_periods( (SELECT state_agg(ts, state) FROM states_test), 'OK' ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT ((SELECT state_agg(ts, state) FROM states_test) -> state_periods('OK')).*; ``` ```output start_time | end_time ------------------------+------------------------ 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT start_time, end_time FROM state_periods( (SELECT state_agg(ts, state) FROM states_test_4), 51351 ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT start_time, end_time FROM state_periods( (SELECT state_agg(ts, state) FROM states_test), 'ANYTHING' ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- ``` ## interpolated_state_timeline ```SQL SELECT state, start_time, end_time FROM interpolated_state_timeline( (SELECT state_agg(ts, state) FROM states_test), '2019-12-31', '1 days', (SELECT state_agg(ts, state) FROM states_test_3) ) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- START | 2019-12-31 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT ((SELECT state_agg(ts, state) FROM states_test) -> interpolated_state_timeline( '2019-12-31', '1 days', (SELECT state_agg(ts, state) FROM states_test_3) )).* ORDER BY start_time; ``` ```output state | start_time | end_time -------+------------------------+------------------------ START | 2019-12-31 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT state, start_time, end_time FROM interpolated_state_int_timeline( (SELECT state_agg(ts, state) FROM states_test_5), '2019-12-31', '1 days', (SELECT state_agg(ts, state) FROM states_test_6) ) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- 4 | 2019-12-31 00:00:00+00 | 2020-01-01 00:00:11+00 51351 | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2 | 2020-01-01 00:01:00+00 | 2020-01-01 00:02:03+00 51351 | 2020-01-01 00:02:03+00 | 2020-01-01 00:02:05+00 -9 | 2020-01-01 00:02:05+00 | 2020-01-01 00:02:05+00 ``` ```SQL SELECT state, start_time, end_time FROM interpolated_state_timeline( (SELECT state_agg(ts, state) FROM states_test), '2019-12-31', '5 days', (SELECT state_agg(ts, state) FROM states_test_3) ) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- START | 2019-12-31 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-05 00:00:00+00 ``` ```SQL SELECT state, start_time, end_time FROM interpolated_state_timeline( (SELECT state_agg(ts, state) FROM states_test), '2019-12-31', '1 days', (SELECT state_agg(ts, state) FROM states_test_2) ) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- STOP | 2019-12-31 00:00:00+00 | 2020-01-01 00:00:00+00 START | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT state, start_time, end_time FROM interpolated_state_timeline( (SELECT state_agg(ts, state) FROM states_test), '2019-12-31', '5 days', (SELECT state_agg(ts, state) FROM states_test_2) ) ORDER BY start_time; ``` ```output state | start_time | end_time ------+------------------------+----------------------- STOP | 2019-12-31 00:00:00+00 | 2020-01-01 00:00:00+00 START | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-05 00:00:00+00 ``` ```SQL SELECT (state_agg(ts, state) -> state_periods('OK')).* FROM states_test; ``` ```output start_time | end_time ------------------------+------------------------ 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 ``` ## interpolated_state_periods ```SQL SELECT start_time, end_time FROM interpolated_state_periods( (SELECT state_agg(ts, state) FROM states_test), 'OK', '2019-12-31', '1 days', (SELECT state_agg(ts, state) FROM states_test_3) ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT ((SELECT state_agg(ts, state) FROM states_test) -> interpolated_state_periods( 'OK', '2019-12-31', '1 days', (SELECT state_agg(ts, state) FROM states_test_3) )).* ORDER BY start_time; ``` ```output start_time | end_time ------------------------+------------------------ 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT start_time, end_time FROM interpolated_state_periods( (SELECT state_agg(ts, state) FROM states_test), 'START', '2019-12-31', '5 days', (SELECT state_agg(ts, state) FROM states_test_3) ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- 2019-12-31 00:00:00+00 | 2020-01-01 00:00:11+00 ``` ```SQL SELECT start_time, end_time FROM interpolated_state_periods( (SELECT state_agg(ts, state) FROM states_test_5), 4, '2019-12-31', '5 days', (SELECT state_agg(ts, state) FROM states_test_6) ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- 2019-12-31 00:00:00+00 | 2020-01-01 00:00:11+00 ``` ```SQL SELECT start_time, end_time FROM interpolated_state_periods( (SELECT state_agg(ts, state) FROM states_test), 'STOP', '2019-12-31', '1 days', (SELECT state_agg(ts, state) FROM states_test_2) ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- 2019-12-31 00:00:00+00 | 2020-01-01 00:00:00+00 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ```SQL SELECT start_time, end_time FROM interpolated_state_periods( (SELECT state_agg(ts, state) FROM states_test), 'STOP', '2019-12-31', '5 days', (SELECT state_agg(ts, state) FROM states_test_2) ) ORDER BY start_time; ``` ```output start_time | end_time -----------------------+----------------------- 2019-12-31 00:00:00+00 | 2020-01-01 00:00:00+00 2020-01-01 00:02:00+00 | 2020-01-05 00:00:00+00 ``` ## rollup ```SQL WITH buckets AS (SELECT date_trunc('minute', ts) as dt, toolkit_experimental.compact_state_agg(ts, state) AS sa FROM states_test GROUP BY date_trunc('minute', ts)) SELECT toolkit_experimental.duration_in( toolkit_experimental.rollup(buckets.sa), 'START' ) FROM buckets; ``` ```output interval ---------- 00:00:11 ``` ```SQL WITH buckets AS (SELECT date_trunc('minute', ts) as dt, toolkit_experimental.compact_state_agg(ts, state) AS sa FROM states_test GROUP BY date_trunc('minute', ts)) SELECT toolkit_experimental.duration_in( toolkit_experimental.rollup(buckets.sa), 'OK' ) FROM buckets; ``` ```output interval ---------- 00:01:46 ``` ```SQL WITH buckets AS (SELECT date_trunc('minute', ts) as dt, state_agg(ts, state) AS sa FROM states_test GROUP BY date_trunc('minute', ts)) SELECT state_timeline( rollup(buckets.sa) ) FROM buckets; ``` ```output state_timeline ----------------------------------------------------------- (START,"2020-01-01 00:00:00+00","2020-01-01 00:00:11+00") (OK,"2020-01-01 00:00:11+00","2020-01-01 00:01:00+00") (ERROR,"2020-01-01 00:01:00+00","2020-01-01 00:01:03+00") (OK,"2020-01-01 00:01:03+00","2020-01-01 00:02:00+00") (STOP,"2020-01-01 00:02:00+00","2020-01-01 00:02:00+00") ``` ```SQL WITH buckets AS (SELECT date_trunc('minute', ts) as dt, state_agg(ts, state) AS sa FROM states_test GROUP BY date_trunc('minute', ts) HAVING date_trunc('minute', ts) != '2020-01-01 00:01:00+00'::timestamptz) SELECT state_timeline( rollup(buckets.sa) ) FROM buckets; ``` ```output state_timeline ----------------------------------------------------------- (START,"2020-01-01 00:00:00+00","2020-01-01 00:00:11+00") (OK,"2020-01-01 00:00:11+00","2020-01-01 00:02:00+00") (STOP,"2020-01-01 00:02:00+00","2020-01-01 00:02:00+00") ``` ```SQL WITH buckets AS (SELECT date_trunc('minute', ts) as dt, state_agg(ts, state) AS sa FROM states_test_5 GROUP BY date_trunc('minute', ts) HAVING date_trunc('minute', ts) != '2020-01-01 00:01:00+00'::timestamptz) SELECT state_int_timeline( rollup(buckets.sa) ) FROM buckets; ``` ```output state_timeline ----------------------------------------------------------- (4,"2020-01-01 00:00:00+00","2020-01-01 00:00:11+00") (51351,"2020-01-01 00:00:11+00","2020-01-01 00:02:05+00") (-9,"2020-01-01 00:02:05+00","2020-01-01 00:02:05+00") ``` ## With continuous aggregate ```SQL ,non-transactional,ignore-output CREATE TABLE email_status ( id BIGINT, ts TIMESTAMPTZ, status TEXT ); SELECT create_hypertable('email_status','ts'); INSERT INTO email_status("ts", "id", "status") VALUES ('2022-01-11 11:51:12',1,'draft'), ('2022-01-11 11:53:23',1,'queued'), ('2022-01-11 11:57:46',1,'sending'), ('2022-01-11 11:57:50',1,'sent'), ('2022-01-11 11:52:12',2,'draft'), ('2022-01-11 11:58:23',2,'queued'), ('2022-01-11 12:00:46',2,'sending'), ('2022-01-11 12:01:03',2,'bounced'); ``` ```SQL ,non-transactional,ignore-output CREATE MATERIALIZED VIEW sa WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('1 minute'::interval, ts) AS bucket, id, state_agg(ts, status) AS agg FROM email_status GROUP BY bucket, id; ``` ```SQL SELECT rollup(agg) -> duration_in('draft') FROM sa WHERE id = 1; ``` ```output ?column? ---------- 00:02:11 ``` ```SQL SELECT (state_timeline(rollup(agg))).* FROM sa WHERE id = 2; ``` ```output state | start_time | end_time ---------+------------------------+------------------------ draft | 2022-01-11 11:52:12+00 | 2022-01-11 11:58:23+00 queued | 2022-01-11 11:58:23+00 | 2022-01-11 12:00:46+00 sending | 2022-01-11 12:00:46+00 | 2022-01-11 12:01:03+00 bounced | 2022-01-11 12:01:03+00 | 2022-01-11 12:01:03+00 ``` ================================================ FILE: docs/stats_agg.md ================================================ # Statistical Aggregates ## Common 1-D Statistical Functions - `average` - `sum` - `num_vals` - `stddev`(population and sample) - `variance` (population and sample ) - `skewness` - `kurtosis` ## 2-D Statistical Regression Functions - `slope` - `intercept` - `x_intercept` - `corr` (correlation coefficient) - `covariance` (population and sample) - `skewness` - `kurtosis` - `determination_coeff` In order to make common statistical aggregates easier to work with in window functions and continuous aggregates, Toolkit provides common statistical aggregates in a slightly different form than otherwise available in PostgreSQL/TimescaleDB. They are re-implemented within the [two-step aggregates framework](docs/two-step_aggregation.md)which exposes a summary form to the user which can then have multiple accessors. ```SQL, non-transactional CREATE TABLE foo ( t timestamptz, x DOUBLE PRECISION, y DOUBLE PRECISION ); ``` In order to run any of these statistical functions you must first perform the `stats_agg` aggregate with either one or two variables, following the general SQL framework for these things, when being used for statistical regression with two dimensions, the dependent variable comes first and the independent variable second, ie: ```SQL, ignore-output SELECT stats_agg(y, x) FROM foo; ``` As with other aggregates in the Toolkit, you can use any of the accessors on the results of the aggregation, so: ```SQL, ignore-output SELECT average( stats_agg(x) ) FROM foo; ``` will give you the average of column `x`. While this is slightly more complex for the simple case, many of the results of these aggregates are not combinable in their final forms, the output of the `stats_agg` aggregate is combinable, which means we can do tumbling window aggregates with them and re-combine them when they are used in continuous aggregates. In the 2-D case, you can access single variable statistics by calling the function with `_x` or `_y` like so: ```SQL, ignore-output SELECT average_x( stats_agg(y, x) ) FROM foo; ``` Statistics involving both variables (the ones only available in the 2-D case) are called normally: ```SQL, ignore-output SELECT slope( stats_agg(y, x) ) FROM foo; ``` For those statistics which have variants for either the sample or population we have made these accessible via a separate variable ie: ```SQL, ignore-output SELECT covariance( stats_agg(y, x), 'population' ) FROM foo; ``` The default for all of these is 'population' (the abbreviations 'pop' and 'samp' are also acceptable). The default means the function may also be called without the second argument, like so: ```SQL, ignore-output SELECT covariance( stats_agg(y, x) ) FROM foo; ``` Which will still return the population covariance. This is a minimum working version of the documentation for now, another working document can be found [here](docs/rolling_average_api_working.md), which goes into the window function usecase and some of the reasoning behind our naming decisions. Please feel free to open issues or discussions if you have questions or comments on the current API. We will further develop the documentation as we stabilize these functions over the coming releases. ================================================ FILE: docs/tdigest.md ================================================ # T-Digest > [Description](#tdigest-description)
> [Details](#tdigest-details)
> [Example](#tdigest-example)
> [Continuous Aggregate Example](#tdigest-cagg-example)
> [API](#tdigest-api) ## Description TimescaleDB Toolkit provides an implementation of the [t-digest data structure](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) for quantile approximations. A t-digest is a space efficient aggregation which provides increased resolution at the edges of the distribution. This allows for more accurate estimates of extreme quantiles than traditional methods. ## Details Timescale's t-digest is implemented as an aggregate function in PostgreSQL. They do not support moving-aggregate mode, and are not ordered-set aggregates. Presently they are restricted to float values, but the goal is to make them polymorphic. They are partializable and are good candidates for [continuous aggregation](https://docs.timescale.com/use-timescale/latest/continuous-aggregates/). One additional thing to note about TDigests is that they are somewhat dependent on the order of inputs. The percentile approximations should be nearly equal for the same underlying data, especially at the extremes of the quantile range where the TDigest is inherently more accurate, they are unlikely to be identical if built in a different order. While this should have little effect on the accuracy of the estimates, it is worth noting that repeating the creation of the TDigest might have subtle differences if the call is being parallelized by Postgres. Similarly, building a TDigest by combining several subdigests using the [summary aggregate](#tdigest-summary) is likely to produce a subtley different result than combining all of the underlying data using a single [point aggregate](#tdigest). ## Usage Example For this example we're going to start with a table containing some NOAA weather data for a few weather stations across the US over the past 20 years. ```SQL ,ignore \d weather; ``` ``` Table "public.weather" Column | Type | Collation | Nullable | Default ---------+-----------------------------+-----------+----------+--------- station | text | | | name | text | | | date | timestamp without time zone | | | prcp | double precision | | | snow | double precision | | | tavg | double precision | | | tmax | double precision | | | tmin | double precision | | | ``` Now let's create some t-digests for our different stations and verify that they're receiving data. ```SQL ,ignore CREATE VIEW high_temp AS SELECT name, tdigest(100, tmax) FROM weather GROUP BY name; SELECT name, num_vals(tdigest) FROM high_temp; ``` ``` name | num_vals ---------------------------------------+----------- PORTLAND INTERNATIONAL AIRPORT, OR US | 7671 LITCHFIELD PARK, AZ US | 5881 NY CITY CENTRAL PARK, NY US | 7671 MIAMI INTERNATIONAL AIRPORT, FL US | 7671 (4 rows) ``` We can then check to see the 99.5 percentile high temperature for each location. ```SQL ,ignore SELECT name, approx_percentile(0.995, tdigest) FROM high_temp; ``` ``` name | quantile ---------------------------------------+-------------------- PORTLAND INTERNATIONAL AIRPORT, OR US | 98.4390837104072 LITCHFIELD PARK, AZ US | 114.97809722222223 NY CITY CENTRAL PARK, NY US | 95.86391321044545 MIAMI INTERNATIONAL AIRPORT, FL US | 95.04283854166665 (4 rows) ``` Or even check to see what quantile 90F would fall at in each city. ```SQL ,ignore SELECT name, approx_percentile_rank(90.0, tdigest) FROM high_temp; ``` ``` name | approx_percentile_rank ---------------------------------------+-------------------- PORTLAND INTERNATIONAL AIRPORT, OR US | 0.9609990016734108 LITCHFIELD PARK, AZ US | 0.5531621580122781 NY CITY CENTRAL PARK, NY US | 0.9657150306348585 MIAMI INTERNATIONAL AIRPORT, FL US | 0.8093468908877591 (4 rows) ``` ## Example Using TimeScale Continuous Aggregates (tdigest-cagg-example) Timescale [continuous aggregates](https://docs.timescale.com/use-timescale/latest/continuous-aggregates/) provide an easy way to keep a tdigest up to date as more data is added to a table. The following example shows how this might look in practice. The first step is to create a Timescale hypertable to store our data. ```SQL ,non-transactional,ignore-output SET TIME ZONE 'UTC'; CREATE TABLE test(time TIMESTAMPTZ, value DOUBLE PRECISION); SELECT create_hypertable('test', 'time'); ``` Next a materialized view with the timescaledb.continuous property is added. This will automatically keep itself, including the tdigest in this case, up to date as data is added to the table. ```SQL ,non-transactional,ignore-output CREATE MATERIALIZED VIEW weekly_sketch WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('7 day'::interval, time) as week, tdigest(100, value) as digest FROM test GROUP BY time_bucket('7 day'::interval, time); ``` Next a utility function, `generate_periodic_normal_series`, is called to generate some data. When called in this manner the function will return 28 days worth of data points spaced 10 minutes apart. These points are generate by adding a random point (with a normal distribution and standard deviation of 100) to a sine wave which oscilates between 900 and 1100 over the period of a day. ```SQL ,non-transactional INSERT INTO test SELECT time, value FROM toolkit_experimental.generate_periodic_normal_series('2020-01-01 UTC'::timestamptz, rng_seed => 543643); ``` ``` INSERT 0 4032 ``` Finally, a query is run over the aggregate to see various approximate percentiles from different weeks. ```SQL,ignore SELECT week, approx_percentile(0.01, digest) AS low, approx_percentile(0.5, digest) AS mid, approx_percentile(0.99, digest) AS high FROM weekly_sketch ORDER BY week; ``` ```ignore week | low | mid | high -----------------------+-------------------+--------------------+-------------------- 2019-12-30 00:00:00+00 | 783.2075197029583 | 1030.4505832620227 | 1276.7865808567146 2020-01-06 00:00:00+00 | 865.2941219994462 | 1096.0356855737048 | 1331.649176312383 2020-01-13 00:00:00+00 | 834.6747915021757 | 1060.024660266383 | 1286.1810386717 2020-01-20 00:00:00+00 | 728.2421431793433 | 955.3913494459423 | 1203.730690023456 2020-01-27 00:00:00+00 | 655.1143367116582 | 903.4836014674186 | 1167.7058289748031 ``` It is also possible to combine the weekly aggregates to run queries on the entire data: ```SQL,ignore SELECT approx_percentile(0.01, combined.digest) AS low, approx_percentile(0.5, combined.digest) AS mid, approx_percentile(0.99, combined.digest) AS high FROM (SELECT rollup(digest) AS digest FROM weekly_sketch) AS combined; ``` ```ignore low | mid | high ------------------+--------------------+-------------------- 746.7844638729881 | 1026.6100299252928 | 1294.5391132795592 ``` ## Command List (A-Z) Aggregate Functions > - [tdigest (point form)](#tdigest) > - [rollup (summary form)](#tdigest-summary) Accessor Functions > - [approx_percentile](#tdigest_quantile) > - [approx_percentile_rank](#tdigest_quantile_at_value) > - [max_val](#tdigest_max) > - [mean](#tdigest_mean) > - [min_val](#tdigest_min) > - [num_vals](#tdigest_count) --- ## **tdigest (point form)** ```SQL ,ignore tdigest( buckets INTEGER, value DOUBLE PRECISION ) RETURNS TDigest ``` This will construct and return a TDigest with the specified number of buckets over the given values. ### Required Arguments |Name| Type |Description| |---|---|---| | `buckets` | `INTEGER` | Number of buckets in the digest. Increasing this will provide more accurate quantile estimates, but will require more memory.| | `value` | `DOUBLE PRECISION` | Column to aggregate.
### Returns |Column|Type|Description| |---|---|---| | `tdigest` | `TDigest` | A t-digest object which may be passed to other t-digest APIs. |
### Sample Usages For this example, assume we have a table 'samples' with a column 'weights' holding `DOUBLE PRECISION` values. The following will simply return a digest over that column ```SQL ,ignore SELECT tdigest(100, data) FROM samples; ``` It may be more useful to build a view from the aggregate that can later be passed to other tdigest functions. ```SQL ,ignore CREATE VIEW digest AS SELECT tdigest(100, data) FROM samples; ``` --- ## **rollup (summary form)** ```SQL ,ignore rollup( digest TDigest ) RETURNS TDigest ``` This will combine multiple already constructed TDigests, if they were created with the same size. This is very useful for re-aggregating digests already constructed using the [point form](#tdigest). Note that the resulting digest may be subtly different from a digest constructed directly from the underlying points, as noted in the [details section](#tdigest-details) above. ### Required Arguments |Name| Type |Description| |---|---|---| | `digest` | `TDigest` | Previously constructed TDigest objects. |
### Returns |Column|Type|Description| |---|---|---| | `tdigest` | `TDigest` | A TDigest representing all of the underlying data from all the subaggregates. |
### Sample Usages This example assumes a table 'samples' with a column 'data' holding `DOUBLE PRECISION` values and an 'id' column that holds the what series the data belongs to. A view to get the TDigests for each `id` using the [point form](#tdigest-point) can be created like so: ```SQL ,ignore CREATE VIEW digests AS SELECT id, rollup(100, data) as digest FROM samples GROUP BY id; ``` That view can then be used to get the full aggregate like so: ```SQL ,ignore SELECT rollup(digest) FROM digests; ``` --- ## **approx_percentile** ```SQL ,ignore approx_percentile( quantile DOUBLE PRECISION, digest TDigest ) RETURNS TDigest ``` Get the approximate value at a quantile from a t-digest ### Required Arguments |Name|Type|Description| |---|---|---| | `quantile` | `DOUBLE PRECISION` | The desired quantile (0.0-1.0) to approximate. | | `digest` | `TDigest` | The digest to compute the quantile on. |
### Returns |Column|Type|Description| |---|---|---| | `approx_percentile` | `DOUBLE PRECISION` | The estimated value at the requested quantile. |
### Sample Usage ```SQL SELECT approx_percentile(0.90, tdigest(100, data)) FROM generate_series(1, 100) data; ``` ```output approx_percentile ---------- 90.5 ``` --- ## **approx_percentile_rank** ```SQL ,ignore approx_percentile_rank( value DOUBLE PRECISION, digest TDigest ) RETURNS TDigest ``` Estimate what quantile a given value would be located at in a t-digest. ### Required Arguments |Name|Type|Description| |---|---|---| | `value` | `DOUBLE PRECISION` | The value to estimate the quantile of. | | `digest` | `TDigest` | The digest to compute the quantile on. |
### Returns |Column|Type|Description| |---|---|---| | `approx_percentile_rank` | `DOUBLE PRECISION` | The estimated quantile associated with the provided value. |
### Sample Usage ```SQL SELECT approx_percentile_rank(90, tdigest(100, data)) FROM generate_series(1, 100) data; ``` ```output approx_percentile_rank ------------------- 0.895 ``` ## **max_val** ```SQL ,ignore max_val(digest TDigest) RETURNS DOUBLE PRECISION ``` Get the maximum value from a t-digest. ### Required Arguments |Name|Type|Description| |---|---|---| | `digest` | `TDigest` | The digest to extract the max value from. |
### Returns |Column|Type|Description| |---|---|---| | `max_val` | `DOUBLE PRECISION` | The maximum value entered into the t-digest. |
### Sample Usage ```SQL SELECT max_val(tdigest(100, data)) FROM generate_series(1, 100) data; ``` ```output max_val --------- 100 ``` --- ## **mean** ```SQL ,ignore mean(digest TDigest) RETURNS DOUBLE PRECISION ``` Get the average of all the values contained in a t-digest. ### Required Arguments |Name|Type|Description| |---|---|---| | `digest` | `TDigest` | The digest to extract the mean value from. |
### Returns |Column|Type|Description| |---|---|---| | `mean` | `DOUBLE PRECISION` | The average of the values entered into the t-digest. |
### Sample Usage ```SQL SELECT mean(tdigest(100, data)) FROM generate_series(1, 100) data; ``` ```output mean ------ 50.5 ``` --- ## **min_val** ```SQL ,ignore min_val(digest TDigest) RETURNS DOUBLE PRECISION ``` Get the minimum value from a t-digest. ### Required Arguments |Name|Type|Description| |---|---|---| | `digest` | `TDigest` | The digest to extract the min value from. |
### Returns |Column|Type|Description| |---|---|---| | `min_val` | `DOUBLE PRECISION` | The minimum value entered into the t-digest. |
### Sample Usages ```SQL SELECT min_val(tdigest(100, data)) FROM generate_series(1, 100) data; ``` ```output min_val ----------- 1 ``` --- ## **num_vals** ```SQL ,ignore num_vals(digest TDigest) RETURNS DOUBLE PRECISION ``` Get the number of values contained in a t-digest. ### Required Arguments |Name|Type|Description| |---|---|---| | `digest` | `TDigest` | The digest to extract the number of values from. |
### Returns |Column|Type|Description| |---|---|---| | `num_vals` | `DOUBLE PRECISION` | The number of values entered into the t-digest. |
### Sample Usage ```SQL SELECT num_vals(tdigest(100, data)) FROM generate_series(1, 100) data; ``` ```output num_vals ----------- 100 ``` --- ================================================ FILE: docs/template.md ================================================ # FEATURE-NAME [experimental](/docs/README.md#tag-notes) - Current status: ( prototype | experimental | stabilizing | stable ) - Effort remaining: ( little | some | lots ) This is a living document. ## Purpose - How will this be used? - What problem is the user trying to solve? - What kind of SQL are they going to write? - Is there pure SQL query we are simplifying? ## Use cases - e.g. single groupings and multiple groupings (not just on `"time"`) ### Test Data Examples below are tested against the following data: ```SQL ,non-transactional SET TIME ZONE 'UTC'; CREATE TABLE example(time TIMESTAMPTZ, value DOUBLE PRECISION); ``` TODO It would be nice not to have to front-load this. It shouldn't be too hard to mark prereq blocks as such so update-tester can find it and run those blocks first. ### simple use case ```SQL ``` ```output ``` ### complex use cases ### edge cases ## Common functionality For aggregates, list our common function overloads here and how this aggregate implements them, or why it doesn't. ### rollup ### into_values / unnest Is there a need to return a set from the aggregate? ## Implementation plan ### Current status ### Next steps First step is a simple use case in `toolkit_experimental`. Other steps may include: - expanded functionality - adjusting based on user feedback - optimization And finally: stabilization or removal. ## Performance (aspirational) notes on expectations, current status, future goals TODO we'll need to document our approach to benchmarking first talk to other groups (who? query experience?) For example if there's a pure SQL way to accomplish a goal and we're just offering an improvement, we ought to measure both and show the results. ## Alternatives Be sure to list alternatives considered and how we chose this approach. ```SQL ,ignore [SQL that doesn't work because we didn't implement it] ``` ================================================ FILE: docs/test_caggs.md ================================================ # Continuous aggregation tests This document serves as a driver for allowing our doctester to verify the behavior of some of our features on continuous aggregates. It is not intended to serve as documentation, though it does present an example of using continuous aggregates with some toolkit code. We're also going to adjust extra_float_digits to use 12 significant digits. This prevents some spurious failures in the skewness and kurtosis accessors used below. ```SQL ,non-transactional,ignore-output SET extra_float_digits = -3; ``` ## Setup table ```SQL ,non-transactional,ignore-output SET TIME ZONE 'UTC'; CREATE TABLE test(time TIMESTAMPTZ, value1 DOUBLE PRECISION, value2 DOUBLE PRECISION); SELECT create_hypertable('test', 'time'); ``` ## Setup continuous aggs ```SQL ,non-transactional,ignore-output CREATE MATERIALIZED VIEW weekly_aggs WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('7 day'::interval, time) as week, hyperloglog(64, value1) as hll, counter_agg(time, value1) as counter, stats_agg(value1, value2) as stats, timevector(time, value2) as tvec, heartbeat_agg(time, time_bucket('7 day'::interval, time), '1w', '55m') as hb FROM test GROUP BY time_bucket('7 day'::interval, time); ``` ## Populate table ```SQL ,non-transactional,ignore-output INSERT INTO test SELECT '2020-01-01'::TIMESTAMPTZ + '1 hour'::INTERVAL * row_number() OVER (), v.b, v.b::DOUBLE PRECISION/v.a::DOUBLE PRECISION FROM (SELECT a, generate_series(a, 100) AS b FROM generate_series(1, 100) a) v; ``` ## Validate continuous aggs ```SQL SELECT week, distinct_count(hll), rate(counter), skewness_x(stats, 'population') FROM weekly_aggs WHERE week > '2020-06-01'::TIMESTAMPTZ ORDER BY week; ``` ```output week | distinct_count | rate | skewness_x ------------------------+----------------+-------------------+------------------- 2020-06-08 00:00:00+00 | 49 | 0.000627079174983 | 0.0970167274813 2020-06-15 00:00:00+00 | 45 | 0.00065369261477 | -0.0885157388226 2020-06-22 00:00:00+00 | 42 | 0.000680306054558 | 0.0864685035294 2020-06-29 00:00:00+00 | 36 | 0.000706919494345 | -0.0257336371983 2020-07-06 00:00:00+00 | 31 | 0.000971390552229 | 0.169001960922 2020-07-13 00:00:00+00 | 28 | 0.0011626746507 | 0.0432068720231 2020-07-20 00:00:00+00 | 22 | 0.00168330006653 | 0.344413728361 2020-07-27 00:00:00+00 | 10 | 0.00432471264368 | 0.624916113283 ``` ```SQL SELECT distinct_count(rollup(hll)), stderror(rollup(hll)) FROM weekly_aggs; ``` ```output distinct_count | stderror ----------------+---------- 115 | 0.13 ``` ```SQL SELECT num_resets(rollup(counter)) FROM weekly_aggs; ``` ```output num_resets ------------ 98 ``` ```SQL SELECT average_y(rollup(stats)), stddev_y(rollup(stats)), skewness_y(rollup(stats), 'population'), kurtosis_y(rollup(stats), 'population') FROM weekly_aggs; ``` ```output average_y | stddev_y | skewness_y | kurtosis_y -----------+---------------+-----------------+---------------- 67 | 23.6877840059 | -0.565748443434 | 2.39964349376 ``` ```SQL SELECT week, count(*) FROM ( SELECT week, unnest(tvec) FROM weekly_aggs WHERE week > '2020-06-01'::TIMESTAMPTZ ) s GROUP BY week ORDER BY week; ``` ```output week | count ------------------------+------- 2020-06-08 00:00:00+00 | 168 2020-06-15 00:00:00+00 | 168 2020-06-22 00:00:00+00 | 168 2020-06-29 00:00:00+00 | 168 2020-07-06 00:00:00+00 | 168 2020-07-13 00:00:00+00 | 168 2020-07-20 00:00:00+00 | 168 2020-07-27 00:00:00+00 | 59 ``` ```SQL SELECT week, uptime(hb), interpolated_uptime(hb, LAG(hb) OVER (ORDER BY week)) FROM weekly_aggs WHERE week > '2020-06-01' ORDER BY week; ``` ```output week | uptime | interpolated_uptime ------------------------+-----------------+--------------------- 2020-06-08 00:00:00+00 | 6 days 10:00:00 | 6 days 10:00:00 2020-06-15 00:00:00+00 | 6 days 10:00:00 | 6 days 10:00:00 2020-06-22 00:00:00+00 | 6 days 10:00:00 | 6 days 10:00:00 2020-06-29 00:00:00+00 | 6 days 10:00:00 | 6 days 10:00:00 2020-07-06 00:00:00+00 | 6 days 10:00:00 | 6 days 10:00:00 2020-07-13 00:00:00+00 | 6 days 10:00:00 | 6 days 10:00:00 2020-07-20 00:00:00+00 | 6 days 10:00:00 | 6 days 10:00:00 2020-07-27 00:00:00+00 | 2 days 06:05:00 | 2 days 06:05:00 ``` ================================================ FILE: docs/test_candlestick_agg.md ================================================ # Candlestick Continuous Aggregation Tests ## Setup table ```SQL,non-transactional,ignore-output SET TIME ZONE 'UTC'; CREATE TABLE stocks_real_time ( time TIMESTAMPTZ NOT NULL, symbol TEXT NOT NULL, price DOUBLE PRECISION NULL, day_volume INT NULL ); SELECT create_hypertable('stocks_real_time','time'); ``` ## Setup Continuous Aggs ```SQL,non-transactional,ignore-output CREATE MATERIALIZED VIEW cs WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('1 minute'::interval, "time") AS ts, symbol, candlestick_agg("time", price, day_volume) AS candlestick FROM stocks_real_time GROUP BY ts, symbol; ``` ## Insert data into table ```SQL,non-transactional,ignore-output INSERT INTO stocks_real_time("time","symbol","price","day_volume") VALUES ('2023-01-11 18:59:59+00','AAPL',140,20), ('2023-01-11 18:23:58+00','AAPL',100,10), ('2023-01-11 17:59:57+00','AAPL',133.445,NULL), ('2023-01-11 17:59:55+00','PFE',47.38,2000), ('2023-01-11 12:15:55+00','PFE',1,23), ('2023-01-11 12:00:52+00','AAPL',29.82,NULL), ('2023-01-11 11:12:12+00','PFE',47.38,14), ('2023-01-11 11:01:50+00','AMZN',95.25,1000), ('2023-01-11 11:01:32+00','AMZN',92,NULL), ('2023-01-11 11:01:30+00','AMZN',75.225,NULL); ``` ## Query by-minute continuous aggregate over stock trade data for ohlc prices along with timestamps ```SQL,non-transactional SELECT ts, symbol, open_time(candlestick), open(candlestick), high_time(candlestick), high(candlestick), low_time(candlestick), low(candlestick), close_time(candlestick), close(candlestick), volume(candlestick) FROM cs; ``` ```output ts | symbol | open_time | open | high_time | high | low_time | low | close_time | close | volume ------------------------+--------+------------------------+---------+------------------------+---------+------------------------+---------+------------------------+---------+-------- 2023-01-11 12:15:00+00 | PFE | 2023-01-11 12:15:55+00 | 1 | 2023-01-11 12:15:55+00 | 1 | 2023-01-11 12:15:55+00 | 1 | 2023-01-11 12:15:55+00 | 1 | 23 2023-01-11 17:59:00+00 | PFE | 2023-01-11 17:59:55+00 | 47.38 | 2023-01-11 17:59:55+00 | 47.38 | 2023-01-11 17:59:55+00 | 47.38 | 2023-01-11 17:59:55+00 | 47.38 | 2000 2023-01-11 11:01:00+00 | AMZN | 2023-01-11 11:01:30+00 | 75.225 | 2023-01-11 11:01:50+00 | 95.25 | 2023-01-11 11:01:30+00 | 75.225 | 2023-01-11 11:01:50+00 | 95.25 | 2023-01-11 18:59:00+00 | AAPL | 2023-01-11 18:59:59+00 | 140 | 2023-01-11 18:59:59+00 | 140 | 2023-01-11 18:59:59+00 | 140 | 2023-01-11 18:59:59+00 | 140 | 20 2023-01-11 11:12:00+00 | PFE | 2023-01-11 11:12:12+00 | 47.38 | 2023-01-11 11:12:12+00 | 47.38 | 2023-01-11 11:12:12+00 | 47.38 | 2023-01-11 11:12:12+00 | 47.38 | 14 2023-01-11 17:59:00+00 | AAPL | 2023-01-11 17:59:57+00 | 133.445 | 2023-01-11 17:59:57+00 | 133.445 | 2023-01-11 17:59:57+00 | 133.445 | 2023-01-11 17:59:57+00 | 133.445 | 2023-01-11 18:23:00+00 | AAPL | 2023-01-11 18:23:58+00 | 100 | 2023-01-11 18:23:58+00 | 100 | 2023-01-11 18:23:58+00 | 100 | 2023-01-11 18:23:58+00 | 100 | 10 2023-01-11 12:00:00+00 | AAPL | 2023-01-11 12:00:52+00 | 29.82 | 2023-01-11 12:00:52+00 | 29.82 | 2023-01-11 12:00:52+00 | 29.82 | 2023-01-11 12:00:52+00 | 29.82 | ``` ================================================ FILE: docs/time_weighted_average.md ================================================ # Time Weighted Average > [Description](#time-weighted-average-description)
> [Example Usage](time-weighted-average-examples)
> [API](#time-weighted-average-api)
> [Notes on Parallelism and Ordering](#time-weight-ordering)
> [Interpolation Methods Details](#time-weight-methods)
## Description Time weighted averages are commonly used in cases where a time series is not evenly sampled, so a traditional average will give misleading results. Consider a voltage sensor that sends readings once every 5 minutes or whenever the value changes by more than 1 V from the previous reading. If the results are generally stable, but with some quick moving transients, a simple average over all of the points will tend to over-weight the transients instead of the stable readings. A time weighted average weights each value by the duration over which it occurred based on the points around it and produces correct results for unevenly spaced series. TimescaleDB Toolkit's time weighted average is implemented as an aggregate which weights each value either using a last observation carried forward (LOCF) approach or a linear interpolation approach ([see interpolation methods](#time-weight-methods)). While the aggregate is not parallelizable, it is supported with [continuous aggregation](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates). Additionally, [see the notes on parallelism and ordering](#time-weight-ordering) for a deeper dive into considerations for use with parallelism and some discussion of the internal data structures. --- ## Example Usage For these examples we'll assume a table `foo` defined as follows, with a bit of example data: ```SQL ,non-transactional SET TIME ZONE 'UTC'; CREATE TABLE foo ( measure_id BIGINT, ts TIMESTAMPTZ , val DOUBLE PRECISION, PRIMARY KEY (measure_id, ts) ); INSERT INTO foo VALUES ( 1, '2020-01-01 00:00:00+00', 10.0), ( 1, '2020-01-01 00:01:00+00', 20.0), ( 1, '2020-01-01 00:02:00+00',10.0), ( 1, '2020-01-01 00:03:00+00', 20.0), ( 1, '2020-01-01 00:04:00+00', 15.0), ( 2, '2020-01-01 00:00:00+00', 10.0), ( 2, '2020-01-01 00:01:00+00', 20.0), ( 2, '2020-01-01 00:02:00+00',10.0), ( 2, '2020-01-01 00:03:00+00', 20.0), ( 2, '2020-01-01 00:04:00+00', 10.0), ( 2, '2020-01-01 00:08:00+00', 10.0), ( 2, '2020-01-01 00:10:00+00', 30.0), ( 2, '2020-01-01 00:10:30+00',10.0), ( 2, '2020-01-01 00:16:30+00', 35.0), ( 2, '2020-01-01 00:30:00+00', 60.0); ``` ```output INSERT 0 15 ``` Where the measure_id defines a series of related points. A simple use would be to calculate the time weighted average over the whole set of points for each `measure_id`. We'll use the LOCF method for weighting: ```SQL SELECT measure_id, average( time_weight('LOCF', ts, val) ) FROM foo GROUP BY measure_id ORDER BY measure_id; ``` ```output measure_id | average ------------+--------- 1 | 15 2 | 22.25 ``` (And of course a where clause can be used to limit the time period we are averaging, the measures we're using etc.). We can also use the [`time_bucket` function](https://docs.timescale.com/latest/api#time_bucket) to produce a series averages in 15 minute buckets: ```SQL SELECT measure_id, time_bucket('5 min'::interval, ts) as bucket, average( time_weight('LOCF', ts, val) ) FROM foo GROUP BY measure_id, time_bucket('5 min'::interval, ts) ORDER BY measure_id, time_bucket('5 min'::interval, ts); ``` ```output measure_id | bucket | average ------------+------------------------+--------- 1 | 2020-01-01 00:00:00+00 | 15 2 | 2020-01-01 00:00:00+00 | 15 2 | 2020-01-01 00:05:00+00 | 2 | 2020-01-01 00:10:00+00 | 30 2 | 2020-01-01 00:15:00+00 | 2 | 2020-01-01 00:30:00+00 | ``` Note that in this case, there are several `time_buckets` that have only a single value, these return `NULL` as the average as we cannot take a time weighted average with only a single point in a bucket and no information about points outside the bucket. In many cases we'll have significantly more data here, but for the example we wanted to keep our data set small. Of course this might be more useful if we make a continuous aggregate out of it. We'll first have to make it a hypertable partitioned on the ts column, with a relatively large chunk_time_interval because the data isn't too high rate: ```SQL ,non-transactional,ignore-output SELECT create_hypertable('foo', 'ts', chunk_time_interval=> '15 days'::interval, migrate_data => true); ``` Now we can make our continuous aggregate: ```SQL ,non-transactional, ignore-output CREATE MATERIALIZED VIEW foo_5 WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT measure_id, time_bucket('5 min'::interval, ts) as bucket, time_weight('LOCF', ts, val) FROM foo GROUP BY measure_id, time_bucket('5 min'::interval, ts); ``` Note that here, we just use the `time_weight` function. It's often better to do that and simply run the `average` function when selecting from the view like so: ```SQL SELECT measure_id, bucket, average(time_weight) FROM foo_5 ORDER BY measure_id, bucket; ``` ```output measure_id | bucket | average ------------+------------------------+--------- 1 | 2020-01-01 00:00:00+00 | 15 2 | 2020-01-01 00:00:00+00 | 15 2 | 2020-01-01 00:05:00+00 | 2 | 2020-01-01 00:10:00+00 | 30 2 | 2020-01-01 00:15:00+00 | 2 | 2020-01-01 00:30:00+00 | ``` And we get the same results as before. It also allows us to re-aggregate from the continuous aggregate into a larger bucket size quite simply: ```SQL SELECT measure_id, time_bucket('1 day'::interval, bucket), average( rollup(time_weight) ) FROM foo_5 GROUP BY measure_id, time_bucket('1 day'::interval, bucket) ORDER BY measure_id, time_bucket('1 day'::interval, bucket); ``` ```output measure_id | time_bucket | average ------------+------------------------+--------- 1 | 2020-01-01 00:00:00+00 | 15 2 | 2020-01-01 00:00:00+00 | 22.25 ``` We can also use this to speed up our initial calculation where we're only grouping by measure_id and producing a full average (assuming we have a fair number of points per 5 minute period, here it's not going to do much because of our limited example data, but you get the gist): ```SQL SELECT measure_id, average( rollup(time_weight) ) FROM foo_5 GROUP BY measure_id ORDER BY measure_id; ``` ```output measure_id | average ------------+--------- 1 | 15 2 | 22.25 ``` --- ## Command List (A-Z) > - [time_weight() (point form)](#time_weight_point) > - [rollup() (summary form)](#time-weight-summary) > - [average()](#time-weight-average) --- ## **time_weight() (point form)** ```SQL ,ignore time_weight( method TEXT¹, ts TIMESTAMPTZ, value DOUBLE PRECISION ) RETURNS TimeWeightSummary ``` ¹ Only two values are currently supported, 'linear' and 'LOCF', any capitalization of these will be accepted. [See interpolation methods for more info.](#time-weight-methods) An aggregate that produces a `TimeWeightSummary` from timestamps and associated values. ### Required Arguments² |Name| Type |Description| |---|---|---| | `method` | `TEXT` | The weighting method we should use, options are 'linear' or 'LOCF', not case sensitive | | `ts` | `TIMESTAMPTZ` | The time at each point | | `value` | `DOUBLE PRECISION` | The value at each point to use for the time weighted average|
##### ² Note that `ts` and `value` can be `null`, however the aggregate is not evaluated on `null` values and will return `null`, but it will not error on `null` inputs. ### Returns |Column|Type|Description| |---|---|---| | `time_weight` | `TimeWeightSummary` | A TimeWeightSummary object that can be passed to other functions within the time weighting API. |
### Sample Usage ```SQL ,ignore-output WITH t as ( SELECT time_bucket('1 day'::interval, ts) as dt, time_weight('Linear', ts, val) AS tw -- get a time weight summary FROM foo WHERE measure_id = 10 GROUP BY time_bucket('1 day'::interval, ts) ) SELECT dt, average(tw) -- extract the average from the time weight summary FROM t; ``` ## **rollup() (summary form)** ```SQL ,ignore rollup( tws TimeWeightSummary ) RETURNS TimeWeightSummary ``` An aggregate to compute a combined `TimeWeightSummary` from a series of non-overlapping `TimeWeightSummaries`. Non-disjoint `TimeWeightSummaries` will cause errors. See [Notes on Parallelism and Ordering](#time-weight-ordering) for more information. ### Required Arguments² |Name| Type |Description| |---|---|---| | `tws` | `TimeWeightSummary` | The input TimeWeightSummary from a previous `time_weight` (point form) call, often from a [continuous aggregate](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates)| ### Returns |Column|Type|Description| |---|---|---| | `time_weight` | `TimeWeightSummary` | A TimeWeightSummary object that can be passed to other functions within the time weighting API. |
### Sample Usage ```SQL ,ignore-output WITH t as ( SELECT date_trunc('day', ts) as dt, time_weight('Linear', ts, val) AS tw -- get a time weight summary FROM foo WHERE measure_id = 10 GROUP BY date_trunc('day', ts) ), q as ( SELECT rollup(tw) AS full_tw -- do a second level of aggregation to get the full time weighted average FROM t ) SELECT dt, average(tw), -- extract the average from the time weight summary average(tw) / (SELECT average(full_tw) FROM q LIMIT 1) as normalized -- get the normalized average FROM t; ``` ## **average()** ```SQL ,ignore average( tws TimeWeightSummary ) RETURNS DOUBLE PRECISION ``` A function to compute a time weighted average from a `TimeWeightSummary`. ### Required Arguments |Name| Type |Description| |---|---|---| | `tws` | `TimeWeightSummary` | The input TimeWeightSummary from a `time_weight` call.| ### Returns |Column|Type|Description| |---|---|---| | `average` | `DOUBLE PRECISION` | The time weighted average computed from the `TimeWeightSummary`|
### Sample Usage ```SQL ,ignore SELECT id, average(tws) FROM ( SELECT id, time_weight('LOCF', ts, val) AS tws FROM foo GROUP BY id ) t ``` --- ## Notes on Parallelism and Ordering The time weighted average calculations we perform require a strict ordering of inputs and therefore the calculations are not parallelizable in the strict Postgres sense. This is because when Postgres does parallelism it hands out rows randomly, basically as it sees them to workers. However, if your parallelism can guarantee disjoint (in time) sets of rows, the algorithm can be parallelized, just so long as within some time range, all rows go to the same worker. This is the case for both [continuous aggregates](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates) and for [distributed hypertables](https://docs.timescale.com/latest/using-timescaledb/distributed-hypertables) (as long as the partitioning keys are in the group by, though the aggregate itself doesn't horribly make sense otherwise). We throw an error if there is an attempt to combine overlapping `TimeWeightSummaries`, for instance, in our example above, if you were to try to combine summaries across `measure_id`s it would error. This is because the interpolation techniques really only make sense within a given time series determined by a single `measure_id`. However, given that the time weighted average produced is a dimensionless quantity, a simple average of time weighted average should better represent the variation across devices, so the recommendation for things like baselines across many timevector would be something like: ```SQL ,ignore-output WITH t as (SELECT measure_id, average( time_weight('LOCF', ts, val) ) as time_weighted_average FROM foo GROUP BY measure_id) SELECT avg(time_weighted_average) -- use the normal avg function to average our time weighted averages FROM t; ``` Internally, the first and last points seen as well as the calculated weighted sum are stored in each `TimeWeightSummary` and used to combine with a neighboring `TimeWeightSummary` when re-aggregation or the Postgres `combine function` is called. In general, the functions support [partial aggregation](https://www.postgresql.org/docs/current/xaggr.html#XAGGR-PARTIAL-AGGREGATES) and partitionwise aggregation in the multinode context, but are not parallelizable (in the Postgres sense, which requires them to accept potentially overlapping input). Because they require ordered sets, the aggregates build up a buffer of input data, sort it and then perform the proper aggregation steps. In cases where memory is proving to be too small to build up a buffer of points causing OOMs or other issues, a multi-level aggregate can be useful. Following our example from above: ```SQL ,ignore-output WITH t as (SELECT measure_id, time_bucket('1 day'::interval, ts), time_weight('LOCF', ts, val) FROM foo GROUP BY measure_id, time_bucket('1 day'::interval, ts) ) SELECT measure_id, average( rollup(time_weight) ) FROM t GROUP BY measure_id; ``` Moving aggregate mode is not supported by `time_weight` and its use as a window function may be quite inefficient, but it is possible to do so as in: ```SQL ,ignore-output SELECT measure_id, average( time_weight('LOCF', ts, val) OVER (PARTITION BY measure_id ORDER BY ts RANGE '15 minutes'::interval PRECEDING) ) FROM foo; ``` Which will give you the 15 minute rolling time weighted average for each point. --- ## Interpolation Methods Details Discrete time values don't always allow for an obvious calculation of the time weighted average. In order to calculate a time weighted average we need to choose how to weight each value. The two methods we currently use are last observation carried forward (LOCF) and linear interpolation. In the LOCF approach, the value is treated as if it remains constant until the next value is seen. The LOCF approach is commonly used when the sensor or measurement device sends measurement only when there is a change in value. The linear interpolation approach treats the values between any two measurements as if they lie on the line connecting the two measurements. The linear interpolation approach is used to account for irregularly sampled data where the sensor doesn't provide any guarantees Essentially, internally, the time weighted average computes a numerical approximation of the integral of the theoretical full time curve based on the discrete sampled points provided. We call this the weighted sum. For LOCF, the the weighted sum will be equivalent to the area under a stepped curve: ``` | (pt 4) | (pt 2) * | *------- | | | | | |(pt 1) | *------ | *--------- (pt 3) | | | | |__|_______________________|______ time ``` The linear interpolation is similar, except here it is more of a sawtooth curve. (And the points are different due to the limitations of the slopes of lines one can "draw" using ASCII art). ``` | (pt 4) | * | (pt 2) / | | * / | | / \ / | |(pt 1) / * | | * (pt 3) | | | | |______|_________________|____________ time ``` Here this ends up being equal to the rectangle with width equal to the duration between two points and height the midpoint between the two magnitudes. Once we have this weighted sum, we can divide by the total duration to get the time weighted average. ================================================ FILE: docs/timeseries.md ================================================ # Timevector > [Description](#timevector-description)
> [Timevector Pipelines](#timevector-pipelines)
> [Example](#timevector-example)
> [API](#timevector-api) ## Description A timevector is an intermediate representation of a particular value over time used by the extension. It is a space efficient representation used to store the result of analytic functions such as [asap_smooth]((asap.md#asap_smooth)) or [lttb]((lttb.md#lttb)). Data can also be directly aggregated into a timevector and passed to functions which support this representation. The [unnest](#timevector_unnest) API can be used to get the data back from a timevector. ## Timevector Pipelines In an attempt to streamline the timevector interface and make them as easy to use as possible, we've provided a custom operator `->` for applying common operations to timevector and chaining such operations together. This is much more fully documented in the [timevector pipeline elements](timevector_pipeline_elements.md) page. ## Usage Example For this example, let's start with a table containing some random test data. ```SQL ,non-transactional,ignore-output SET TIME ZONE 'UTC'; CREATE TABLE test(time TIMESTAMPTZ, value DOUBLE PRECISION); ``` ```SQL ,non-transactional INSERT INTO test SELECT time, value FROM toolkit_experimental.generate_periodic_normal_series('2020-01-01 UTC'::timestamptz, rng_seed => 11111); ``` ```output INSERT 0 4032 ``` Now lets capture this data into a time series which we'll store in a view. ```SQL ,non-transactional,ignore-output CREATE VIEW series AS SELECT timevector(time, value) FROM test; ``` We can now use this timevector to efficiently move the data around to other functions. ```SQL SELECT time, value::numeric(10,2) FROM unnest((SELECT lttb(timevector, 20) FROM series)); ``` ```output time | value ------------------------+-------------------- 2020-01-01 00:00:00+00 | 1038.44 2020-01-02 04:20:00+00 | 1325.44 2020-01-03 14:00:00+00 | 708.82 2020-01-04 18:30:00+00 | 1328.28 2020-01-05 16:40:00+00 | 802.20 2020-01-07 06:00:00+00 | 1298.02 2020-01-09 11:20:00+00 | 741.08 2020-01-10 18:40:00+00 | 1357.05 2020-01-13 08:30:00+00 | 780.32 2020-01-14 03:40:00+00 | 1408.34 2020-01-15 01:50:00+00 | 895.15 2020-01-16 20:30:00+00 | 1335.22 2020-01-18 07:20:00+00 | 823.08 2020-01-19 18:10:00+00 | 1245.79 2020-01-21 10:00:00+00 | 666.48 2020-01-22 23:10:00+00 | 1182.87 2020-01-24 09:00:00+00 | 736.47 2020-01-26 05:20:00+00 | 1197.26 2020-01-28 08:10:00+00 | 659.63 2020-01-28 23:50:00+00 | 956.29 ``` ## Command List (A-Z) Aggregate Functions > - [timevector (point form)](#timevector) > - [rollup (summary form)](#timevector-summary) Accessor Functions > - [unnest](#timevector_unnest) --- ## **timevector (point form)** ```SQL ,ignore timevector( time TIMESTAMPTZ, value DOUBLE PRECISION ) RETURNS Timevector ``` This will construct and return timevector object containing the passed in time, value pairs. ### Required Arguments |Name| Type |Description| |---|---|---| | `time` | `TIMESTAMPTZ` | Time column to aggregate. | | `value` | `DOUBLE PRECISION` | Value column to aggregate. |
### Returns |Column|Type|Description| |---|---|---| | `timevector` | `Timevector` | A timevector object which can be efficiently used by any of our timevector operations. |
### Sample Usages For this example, assume we have a table 'samples' with two columns, 'time' and 'weight'. The following will return that table as a timevector. ```SQL ,ignore SELECT timevector(time, weight) FROM samples; ``` --- ## **rollup (summary form)** ```SQL ,ignore rollup( series timevector ) RETURNS timevector ``` This will combine multiple already constructed timevectors. This is very useful for re-aggregating series already constructed using the [point form](#timevector). ### Required Arguments |Name| Type |Description| |---|---|---| | `series` | `timevector` | Previously constructed timevector objects. |
### Returns |Column|Type|Description| |---|---|---| | `timevector` | `timevector` | A timevector combining all the underlying series. |
### Sample Usages This example assumes a table 'samples' with columns 'time', 'data', and 'batch'. We can create a view containing timevector for each batch like so: ```SQL ,ignore CREATE VIEW series AS SELECT batch, timevector(time, data) as batch_series FROM samples GROUP BY batch; ``` If we want to operate over the combination of all batches, we can get the timevector for this as follows: ```SQL ,ignore SELECT rollup(batch_series) FROM series; ``` --- ## **unnest** ```SQL ,ignore unnest( series timevector ) RETURNS TABLE("time" timestamp with time zone, value double precision) ``` The unnest function is used to get the (time, value) pairs back out of a timevector object. ### Required Arguments |Name|Type|Description| |---|---|---| | `series` | `timevector` | The series to return the data from. |
### Returns |Column|Type|Description| |---|---|---| | `unnest` | `TABLE` | The (time,value) records contained in the timevector. |
### Sample Usage ```SQL SELECT unnest( (SELECT timevector(a.time, a.value) FROM (SELECT time, value FROM toolkit_experimental.generate_periodic_normal_series('2020-01-01 UTC'::timestamptz, 45654)) a) ) LIMIT 10; ``` ```output unnest ----------------------------------------------- ("2020-01-01 00:00:00+00",1009.8399687963981) ("2020-01-01 00:10:00+00",873.6326953620166) ("2020-01-01 00:20:00+00",1045.8138997857413) ("2020-01-01 00:30:00+00",1075.472021940188) ("2020-01-01 00:40:00+00",956.0229773008177) ("2020-01-01 00:50:00+00",878.215079403259) ("2020-01-01 01:00:00+00",1067.8120522056508) ("2020-01-01 01:10:00+00",1102.3464544566375) ("2020-01-01 01:20:00+00",952.9509636893868) ("2020-01-01 01:30:00+00",1031.9006507123047) ``` ================================================ FILE: docs/timeseries_pipeline_elements.md ================================================ # Timevector Pipelines [experimental](/docs/README.md#tag-notes) > [Description](#timevector-pipeline-description)
> [Example](#timevector-pipeline-example)
> [Pipeline Elements](#timevector-pipeline-elements) ## Description Timescale timevector objects are just a convenient and efficient way of tracking a single value over time and are detailed a bit more [here](timevector.md). One of our primary goals with timevector is that they should be easy and efficient to perform basic operations on, and that is where pipelines enter the picture. At its simplest, a pipeline is just a timevector connected to a [pipeline element](#timevector-pipeline-elements) via the pipeline operator `->`. However, most pipeline operations output new timevector, so it's possible to chain many pipeline elements together such that the output from one element become the input to the next. ### A note on operator associativity and grouping Due to limitations in the PostgresQL parser, custom operators are required to be left associative. The following pipeline will always result in `elementA` being applied to `timevector` and then `elementB` being applied to the result. ```SQL ,ignore SELECT timevector -> elementA -> elementB; ``` However, it is possible to explicitly group elements using parentheses: ```SQL ,ignore SELECT timevector -> (elementA -> elementB); ``` This will result in a pipeline object being created from elements A and B, which will then be applied to the timevector. While we don't presently take maximum advantage of this internally, these multiple element pipelines should enable optimizations moving forward. Therefore, this second form should be preferred where possible. ## Usage Example For this example let start with a table of temperatures collected from different devices at different times. ```SQL ,non-transactional,ignore-output SET TIME ZONE 'UTC'; CREATE TABLE test_data(time TIMESTAMPTZ, device INTEGER, temperature DOUBLE PRECISION); ``` In order to have some nominally interesting data to look at, let's populate this table with random data covering 30 days of readings over 10 devices. ```SQL ,non-transactional,ignore-output INSERT INTO test_data SELECT '2020-01-01 00:00:00+00'::timestamptz + ((test_random() * 2592000)::int * '1 second'::interval), floor(test_random() * 10 + 1), 50 + test_random() * 20 FROM generate_series(1,10000); ``` Now suppose we want to know how much the temperature fluctuates on a daily basis for each device. Using timevector and pipelines can simplify the process of finding the answer: ```SQL ,non-transactional,ignore-output CREATE VIEW daily_delta AS SELECT device, timevector(time, temperature) -> (toolkit_experimental.sort() -> delta()) AS deltas FROM test_data GROUP BY device; ``` This command creates a timevector from the time and temperature columns (grouped by device), sorts them in increasing time, and computes the deltas between values. Now we can look at the deltas for a specific device. Note that the output for this test is inaccurate as we've removed some of the pipeline elements for the moment. ```SQL,ignore-output SELECT time, value::numeric(4,2) AS delta FROM unnest((SELECT deltas FROM daily_delta WHERE device = 3)); ``` ```output time | delta ------------------------+------- 2020-01-02 00:00:00+00 | -0.54 2020-01-03 00:00:00+00 | 0.29 2020-01-04 00:00:00+00 | -0.25 2020-01-05 00:00:00+00 | 0.07 2020-01-06 00:00:00+00 | 0.80 2020-01-07 00:00:00+00 | -0.27 2020-01-08 00:00:00+00 | -2.55 2020-01-09 00:00:00+00 | 3.51 2020-01-10 00:00:00+00 | -0.78 2020-01-11 00:00:00+00 | -0.39 2020-01-12 00:00:00+00 | 0.55 2020-01-13 00:00:00+00 | -0.87 2020-01-14 00:00:00+00 | 1.17 2020-01-15 00:00:00+00 | -2.49 2020-01-16 00:00:00+00 | 0.10 2020-01-17 00:00:00+00 | 1.09 2020-01-18 00:00:00+00 | -0.09 2020-01-19 00:00:00+00 | 1.14 2020-01-20 00:00:00+00 | -1.23 2020-01-21 00:00:00+00 | -0.29 2020-01-22 00:00:00+00 | -0.37 2020-01-23 00:00:00+00 | 1.48 2020-01-24 00:00:00+00 | -0.52 2020-01-25 00:00:00+00 | 1.34 2020-01-26 00:00:00+00 | -0.95 2020-01-27 00:00:00+00 | -0.65 2020-01-28 00:00:00+00 | -0.42 2020-01-29 00:00:00+00 | 1.42 2020-01-30 00:00:00+00 | -0.66 ``` Or even run one of our device's deltas through lttb to get a nice graphable set of points: ```SQL SELECT (deltas -> toolkit_experimental.lttb(10))::TEXT FROM daily_delta where device = 7; ``` ```output text ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- (version:1,num_points:10,flags:1,internal_padding:(0,0,0),points:[(ts:"2020-01-01 23:45:36+00",val:0),(ts:"2020-01-02 00:28:48+00",val:0.01999999999999602),(ts:"2020-01-02 17:45:36+00",val:0.020000000000003126),(ts:"2020-01-02 17:45:36+00",val:0),(ts:"2020-01-03 03:07:12+00",val:0.020000000000003126),(ts:"2020-01-03 20:24:00+00",val:0.01999999999999602),(ts:"2020-01-03 20:24:00+00",val:0),(ts:"2020-01-04 05:45:36+00",val:0.020000000000003126),(ts:"2020-01-04 23:02:24+00",val:0.020000000000003126),(ts:"2020-01-04 23:02:24+00",val:0)],null_val:[0,0]) ``` ## Current Pipeline Elements(A-Z) As of the current timescale release, these elements are all [experimental](/docs/README.md#tag-notes). > - [delta](#timevector_pipeline_delta) > - [lttb](#timevector_pipeline_lttb) > - [sort](#sort) --- ## **delta** ```SQL ,ignore delta( ) RETURNS TimevectorPipelineElement ``` This element will return a new timevector where each point is the difference between the current and preceding value in the input timevector. The new series will be one point shorter as it will not have a preceding value to return a delta for the first point. ### Required Arguments |Name| Type |Description| |---|---|---|
### Pipeline Execution Returns |Column|Type|Description| |---|---|---| | `timevector` | `Timevector` | The result of applying this pipeline element will be a new time series where each point contains the difference in values from the prior point in the input timevector. |
### Sample Usage ```SQL SELECT time, value FROM unnest( (SELECT timevector('2020-01-01'::timestamptz + step * '1 day'::interval, step * step) -> delta() FROM generate_series(1, 5) step) ); ``` ```output time | value ------------------------+------- 2020-01-03 00:00:00+00 | 3 2020-01-04 00:00:00+00 | 5 2020-01-05 00:00:00+00 | 7 2020-01-06 00:00:00+00 | 9 ``` --- ## **lttb** ```SQL ,ignore lttb( resolution int, ) RETURNS TimevectorPipelineElement ``` This element will return a [largest triangle three buckets](lttb.md#description) approximation of a given timevector. Its behavior is the same as the lttb function documented [here](lttb.md#lttb), save that it expects the series to be sorted. ```SQL ,ignore SELECT lttb(time, value, 40) FROM data; ``` is equivalent to ```SQL ,ignore SELECT timevector(time, value) -> sort() -> lttb() FROM data; ``` ### Required Arguments |Name| Type |Description| |---|---|---| | `resolution` | `INTEGER` | Number of points the output should have. |
### Pipeline Execution Returns |Column|Type|Description| |---|---|---| | `timevector` | `Timevector` | The result of applying this pipeline element will be a new timevector with `resolution` point that is visually similar to the input series. |
### Sample Usage ```SQL SELECT time, value FROM unnest( (SELECT timevector('2020-01-01 UTC'::TIMESTAMPTZ + make_interval(days=>(foo*10)::int), 10 + 5 * cos(foo)) -> toolkit_experimental.lttb(4) FROM generate_series(1,11,0.1) foo) ); ``` ```output time | value ------------------------+-------------------- 2020-01-11 00:00:00+00 | 12.7015115293407 2020-02-01 00:00:00+00 | 5.004324248633603 2020-03-03 00:00:00+00 | 14.982710485116087 2020-04-20 00:00:00+00 | 10.022128489940254 ``` --- ## **sort** ```SQL ,ignore sort( ) RETURNS TimevectorPipelineElement ``` This element takes in a timevector and returns a timevector consisting of the same points, but in order of increasing time values. ### Required Arguments |Name| Type |Description| |---|---|---|
### Pipeline Execution Returns |Column|Type|Description| |---|---|---| | `timevector` | `Timevector` | The result of applying this pipeline element will be a time sorted version of the incoming timevector. |
### Sample Usage ```SQL SELECT time, value FROM unnest( (SELECT timevector('2020-01-06'::timestamptz - step * '1 day'::interval, step * step) -> toolkit_experimental.sort() FROM generate_series(1, 5) step) ); ``` ```output time | value ------------------------+------- 2020-01-01 00:00:00+00 | 25 2020-01-02 00:00:00+00 | 16 2020-01-03 00:00:00+00 | 9 2020-01-04 00:00:00+00 | 4 2020-01-05 00:00:00+00 | 1 ``` --- ================================================ FILE: docs/two-step_aggregation.md ================================================ # Two-Step Aggregation - What It Is and Why We Use It ## What is a Two-Step Aggregate You may have noticed that many of our aggregate functions have two parts to them; first an aggregation step and then second an accessor. For instance: ```SQL , ignore SELECT average(time_weight('LOCF', value)) as time_weighted_average FROM foo; -- or SELECT approx_percentile(0.5, percentile_agg(value)) as median FROM bar; ``` In each case there is an inner aggregate function (`time_weight` / `percentile_agg`) and an outer call to an accessor function (`average` / `approx_percentile`). We use this calling convention in multiple places throughout the TimescaleDB Toolkit project. The inner aggregate call creates a machine-readable partial form that can be used for multiple purposes. The two-step calling convention is slightly longer than a hypothetical one-step one where we just called `time_weighted_average('LOCF', value)` or `percentile_agg(0.5, val)` directly (these functions don't exist, don't try to use them). While the one-step calling convention is easier for the simple case, it becomes much more difficult and hard to reason about for slightly more complex use-cases detailed in the next section. We wanted the calling convention to remain consistent and easy to reason about so you can take advantage of the same functions even as you start doing more complicated analyses. This also to keeps the docs consistent and prevents adding special cases everywhere. ## Why We Use Two-Step Aggregates Interestingly, almost all Postgres aggregates do a version of this [under the hood already](https://www.postgresql.org/docs/current/xaggr.html), where they have an internal state used for aggregation and then a final function that displays the output to the user. So why do we make this calling convention explicit? 1) It allows different accessor function calls to use the same internal state and not redo work. 2) It cleanly distinguishes the parameters that affect the aggregate and those that only affect the accessor. 3) It makes it explicit how and when aggregates can be re-aggregated or "stacked" on themselves with logically consistent results. This also helps them better integrate with [continuous aggregates](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates). 4) It allows for better retrospective analysis of downsampled data in [continuous aggregates](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates). That might have been gibberish to some, so let's unpack it a bit. ### Accessor functions with additional parameters The way the optimizer works, if you run an aggregate like: ```SQL , ignore SELECT avg(val), sum(val), count(val) FROM foo; ``` The internal state of the `avg` is actually the `sum` and the `count` and it just returns `sum / count` in the final step of the aggregate. The optimizer knows, when these functions are used, that it doesn't need to run separate aggregates for each, it can use the same internal function and extract the results it needs. This is great! It can save a lot of work. The problem comes when we do something like `percentile_agg` where we have multiple `approx_percentiles` ie: ```SQL , ignore SELECT approx_percentile(0.1, percentile_agg(val)) as p10, approx_percentile(0.5, percentile_agg(val)) as p50, approx_percentile(0.9, percentile_agg(val)) as p90 FROM foo; ``` Because the aggregate step is the same for all three of the calls, the optimizer can combine all the calls, or I can do so explicitly: ```SQL , ignore WITH pct as (SELECT percentile_agg(val) as approx FROM foo) SELECT approx_percentile(0.1, approx) as p10, approx_percentile(0.5, approx) as p50, approx_percentile(0.9, approx) as p90 FROM pct; ``` But the work done in each case will be the same. If we were to use the one-step calling convention, the extra input of the percentile we're trying to extract would completely confuse the optimizer, and it would have to redo all the calculation inside the aggregate for each of the values you wanted to extract. So, if it were framed like this: ```SQL , ignore -- NB: THIS IS AN EXAMPLE OF AN API WE DECIDED NOT TO USE, IT DOES NOT WORK SELECT approx_percentile(0.1, val) as p10, approx_percentile(0.5, val) as p50, approx_percentile(0.9, val) as p90 FROM foo; ``` the optimizer would be forced to build up the necessary internal state three times rather than just once. This is even more apparent when you want to use multiple accessor functions, which may have different numbers or types of inputs: ```SQL , ignore SELECT approx_percentile(0.1, percentile_agg(val)) as p10, approx_percentile(0.5, percentile_agg(val)) as p50, approx_percentile(0.9, percentile_agg(val)) as p90, error(percentile_agg(val)), approx_percentile_rank(10000, percentile_agg(val)) as percentile_at_threshold FROM foo; ``` The optimizer can easily optimize away the redundant `percentile_agg(val)` calls, but would have much more trouble in the one-step approach. ### Explicit association of parameters with either the aggregation or access step This leads us to our second benefit of the two-step approach. A number of our accessor functions (both completed and planned) take inputs that don't affect how we aggregate the underlying data, but do affect how we extract data from the computed aggregate. If we combine everything into one function, it makes it less clear which is which. Now, our `percentile_agg` implementation uses the `uddsketch` algorithm under the hood and has some default values for parameters, namely the number of buckets it stores and the target error, but there are cases where we might want to use the full algorithm with custom parameters like so: ```SQL , ignore SELECT approx_percentile(0.5, uddsketch(1000, 0.001, val)) as median, -- 1000 buckets, 0.001 relative error target approx_percentile(0.9, uddsketch(1000, 0.001, val)) as p90, approx_percentile(0.5, uddsketch(100, 0.01, val)) as less_accurate_median -- modify the terms for the aggregate get a new approximation FROM foo; ``` Here we can see which parameters are for the `uddsketch` aggregate (the number of buckets and the target error), and which arguments are for`approx_percentile` (the approx_percentile we want to extract). The optimizer will correctly combine the calls for the first two `uddsketch` calls but not for the third. It is also more clear to the user what is going on, and that I can't set my target error at read time, but rather only at calculation time (this is especially helpful for understanding the behavior of [continuous aggregates](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates)). Combining all of these into one function, so we can use the one-step approach, can get unwieldy and unclear very quickly (ie imagine something like `approx_percentile_uddsketch(0.5, 1000, 0.001)`).
### Stacked aggregates and [continuous aggregate](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates) integration Aggregates can be divided into two classes: ones that are "stackable" in their final form and ones that are not. What I'm calling stackable aggregates are ones like `sum`, `min`, `max` etc. that can be re-aggregated on themselves at different groupings without losing their meaning, ie: ```SQL , ignore SELECT sum(val) FROM foo; -- is equivalent to: SELECT sum(sum) FROM (SELECT id, sum(val) FROM foo GROUP BY id) s ``` A non-stackable aggregate like `avg` doesn't have this property: ```SQL , ignore SELECT avg(val) FROM foo; -- is NOT equivalent to: SELECT avg(avg) FROM (SELECT id, avg(val) FROM foo GROUP BY id) s; ``` Or to say it more succinctly: the `sum` of a `sum` is the `sum` but the `avg` of an `avg` is not the `avg`. This is the difference between stackable and non-stackable aggregates. This is not to say that the `avg` of an `avg` is not a useful piece of information, it can be in some cases, but it isn't always what you want and it can be difficult to actually get the true value for non-stackable aggregates, for instance, for `avg` we can take the `count` and `sum` and divide the `sum` by the `count`, but for many aggregates this is not so obvious and for something like `percentile_agg` __LINK__ with a one-step aggregate, the user would simply have to re-implement most of the algorithm in SQL in order to get the result they want. Two-step aggregates expose the internal, re-aggregateable form to the user so they can much more easily do this work, so we've tried to provide two-step aggregates wherever we can. This is especially useful for working with [continuous aggregates](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates), so if I create a continuous aggregate like so: ```SQL , ignore CREATE MATERIALIZED VIEW foo_15 WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT id, time_bucket('15 min'::interval, ts) as bucket, sum(val), percentile_agg(val) FROM foo GROUP BY id, time_bucket('15 min'::interval, ts); ``` And I want to do a second level of aggregation, say over a day, I can do it over the resulting aggregate with the `percentile_agg` function: ```SQL , ignore SELECT id, time_bucket('1 day'::interval, bucket) as bucket, sum(sum), approx_percentile(0.5, percentile_agg(percentile_agg)) as median FROM foo_15 GROUP BY id, time_bucket('1 day'::interval, bucket) ``` ##### NB: There are some two-step aggregates like `tdigest` __ADD LINK? and expose and other bits...__ when we document that function where two-step aggregation can lead to more error or different results, because the algorithm is not deterministic in its re-aggregation, but we will note that clearly in the documentation when that happens, it is unusual. ### Retrospective analysis over downsampled data [Continuous aggregates](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates) (or separate aggregation tables powered by a cron job or [user-defined action]( __LINK__ ) ) aren't just used for speeding up queries, they're also used for [data retention]( __LINK__ ). But this can mean that they are very difficult to modify as your data ages. Unfortunately this is also when you are learning more things about the analysis you want to do on your data. By keeping them in their raw aggregate form, the user has the flexibility to apply different accessors to do retrospective analysis. With a one-step aggregate the user needs to determine, say, which percentiles are important when we create the continuous aggregate, with a two-step aggregate the user can simply determine they're going to want an approximate percentile, and then determine when doing the analysis whether they want the median, the 90th, 95th or 1st percentile. No need to modify the aggregate or try to re-calculate from data that may no longer exist in the system. ================================================ FILE: docs/uddsketch.md ================================================ # UddSketch > [Description](#uddsketch-description)
> [Details](#uddsketch-details)
> [Example](#uddsketch-example)
> [Example in a Continuous Aggregates](#uddsketch-cagg-example)
> [API](#uddsketch-api) ## Description [UddSketch](https://arxiv.org/pdf/2004.08604.pdf) is a specialization of the [DDSketch](https://arxiv.org/pdf/1908.10693.pdf) data structure. It follows the same approach of breaking the data range into a series of logarithmically sized buckets such that it can guarantee a maximum relative error for any percentile estimate as long as it knows which bucket that percentile falls in. Where UddSketch differs from DDSketch in its behavior when the number of buckets required by a set of values exceeds some predefined maximum. In these circumstances DDSketch will maintain it's original error bound, but only for a subset of the range of percentiles. UddSketch, on the other hand, will combine buckets in such a way that it loosens the error bound, but can still estimate all percentile values. As an example, assume both sketches were trying to capture an large set of values to be able to estimate percentiles with 1% relative error but were given too few buckets to do so. The DDSketch implementation would still guarantee 1% relative error, but may only be able to provides estimates in the range (0.05, 0.95). The UddSketch implementation however, might end up only able to guarantee 2% relative error, but would still be able to estimate all percentiles at that error. ## Details Timescale's UddSketch implementation is provided as an aggregate function in PostgreSQL. It does not support moving-aggregate mode, and is not a ordered-set aggregate. It currently only works with `DOUBLE PRECISION` types, but we're intending to relax this constraint as needed. UddSketches are partializable and are good candidates for [continuous aggregation](https://docs.timescale.com/latest/using-timescaledb/continuous-aggregates). It's also worth noting that attempting to set the relative error too small or large can result in breaking behavior. For this reason, the error is required to fall into the range [1.0e-12, 1.0). ## Usage Example For this example we're going to start with a table containing some NOAA weather data for a few weather stations across the US over the past 20 years. ```SQL ,ignore \d weather; ``` ``` Table "public.weather" Column | Type | Collation | Nullable | Default ---------+-----------------------------+-----------+----------+--------- station | text | | | name | text | | | date | timestamp without time zone | | | prcp | double precision | | | snow | double precision | | | tavg | double precision | | | tmax | double precision | | | tmin | double precision | | | ``` Now let's create some UddSketches for our different stations and verify that they're receiving data. ```SQL ,ignore CREATE VIEW daily_rain AS SELECT name, uddsketch(100, 0.005, prcp) FROM weather GROUP BY name; SELECT name, num_vals(uddsketch), error(uddsketch) FROM daily_rain; ``` ``` name | num_vals | error ---------------------------------------+-----------+--------------------- PORTLAND INTERNATIONAL AIRPORT, OR US | 7671 | 0.0199975003624472 LITCHFIELD PARK, AZ US | 5904 | 0.005 NY CITY CENTRAL PARK, NY US | 7671 | 0.03997901311671962 MIAMI INTERNATIONAL AIRPORT, FL US | 7671 | 0.03997901311671962 (4 rows) ``` Notice that 100 buckets proved to be insufficient to maintain 0.5% relative error for three of our data sets, but they've automatically adjusted their bucket size to maintain the desired bucket limit. We can then check some rainfall percentiles to see how our stations compare. ```SQL ,ignore SELECT name, approx_percentile(0.6, uddsketch) FROM daily_rain; ``` ``` name | approx_percentile ---------------------------------------+---------------------- PORTLAND INTERNATIONAL AIRPORT, OR US | 0.009850446542334412 LITCHFIELD PARK, AZ US | 0 NY CITY CENTRAL PARK, NY US | 0 MIAMI INTERNATIONAL AIRPORT, FL US | 0 (4 rows) ``` ```SQL ,ignore SELECT name, approx_percentile(0.9, uddsketch) FROM daily_rain; ``` ``` name | approx_percentile ---------------------------------------+-------------------- PORTLAND INTERNATIONAL AIRPORT, OR US | 0.3072142710699281 LITCHFIELD PARK, AZ US | 0 NY CITY CENTRAL PARK, NY US | 0.4672895773464223 MIAMI INTERNATIONAL AIRPORT, FL US | 0.5483701300878486 (4 rows) ``` ```SQL ,ignore SELECT name, approx_percentile( 0.995, uddsketch) FROM daily_rain; ``` ``` name | approx_percentile ---------------------------------------+-------------------- PORTLAND INTERNATIONAL AIRPORT, OR US | 1.1969797510556823 LITCHFIELD PARK, AZ US | 0.7671946655927083 NY CITY CENTRAL PARK, NY US | 2.3145312888530807 MIAMI INTERNATIONAL AIRPORT, FL US | 2.9423518191328113 (4 rows) ``` ## Example Using TimeScale Continuous Aggregates To have a UddSketch over a PostgresQL table which automatically updates as more data is added, we can make use of continuous aggregates. First, let us create a simple hypertable: ```SQL ,non-transactional,ignore-output SET TIME ZONE 'UTC'; CREATE TABLE test(time TIMESTAMPTZ, value DOUBLE PRECISION); SELECT create_hypertable('test', 'time'); ``` Now we'll create a continuous aggregate which will group all the points for each week into a UddSketch: ```SQL ,non-transactional,ignore-output CREATE MATERIALIZED VIEW weekly_sketch WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('7 day'::interval, time) as week, uddsketch(100, 0.005, value) as sketch FROM test GROUP BY time_bucket('7 day'::interval, time); ``` Next we'll use one of our utility functions, `generate_periodic_normal_series`, to add some data to the table. Using default arguments, this function will add 28 days of data points at 10 minute intervals. ```SQL ,non-transactional INSERT INTO test SELECT time, value FROM toolkit_experimental.generate_periodic_normal_series('2020-01-01 UTC'::timestamptz, rng_seed => 12345678); ``` ``` INSERT 0 4032 ``` Finally, we can query the aggregate to see various approximate percentiles from different weeks. ```SQL SELECT week, error(sketch), approx_percentile(0.01, sketch) AS low, approx_percentile(0.5, sketch) AS mid, approx_percentile(0.99, sketch) AS high FROM weekly_sketch ORDER BY week; ``` ```output week | error | low | mid | high ------------------------+-------+-------------------+--------------------+-------------------- 2019-12-30 00:00:00+00 | 0.005 | 808.3889305072331 | 1037.994095858188 | 1280.5527834239035 2020-01-06 00:00:00+00 | 0.005 | 858.3773394302965 | 1091.213645863754 | 1306.4218833642865 2020-01-13 00:00:00+00 | 0.005 | 816.5134423716273 | 1058.9631440308738 | 1293.4226606442442 2020-01-20 00:00:00+00 | 0.005 | 731.4599430896668 | 958.188678537264 | 1205.9785918127336 2020-01-27 00:00:00+00 | 0.005 | 688.8626877028054 | 911.4568854686239 | 1135.7472981488002 ``` We can also combine the weekly aggregates to run queries on the entire data: ```SQL SELECT error(a.uddsketch), approx_percentile(0.01, a.uddsketch) AS low, approx_percentile(0.5, a.uddsketch) AS mid, approx_percentile(0.99, a.uddsketch) AS high FROM (SELECT rollup(sketch) as uddsketch FROM weekly_sketch) AS a; ``` ```output error | low | mid | high -------+------------------+--------------------+-------------------- 0.005 | 753.736403199032 | 1027.6657963969128 | 1280.5527834239035 ``` ## Command List (A-Z) Aggregate Functions > - [uddsketch - point form](#uddsketch-point) > - [uddsketch - summary form](#uddsketch-summary) Accessor Functions > - [approx_percentile](#approx_percentile) > - [approx_percentile_rank](#approx_percentile_rank) > - [error](#error) > - [mean](#mean) > - [num_vals](#num-vals) --- ## **uddsketch (point form) ** ```SQL ,ignore uddsketch( size INTEGER, max_error DOUBLE PRECISION, value DOUBLE PRECISION ) RETURNS UddSketch ``` This will construct and return a new UddSketch with at most `size` buckets. The maximum relative error of the UddSketch will be bounded by `max_error` unless it is impossible to do so while with the bucket bound. If the sketch has had to combine buckets, the new error can be found with the [uddsketch_error](#error) command. Note that since the error will be increased automatically (roughly doubling at each step) as the number of buckets is exceeded, it is probably worth erring on the side of too small unless you have a good understanding of exactly what your error should be. ### Required Arguments |Name| Type |Description| |---|---|---| | `size` | `INTEGER` | Maximum number of buckets in the sketch. Providing a larger value here will make it more likely that the aggregate will able to maintain the desired error, though will potentially increase the memory usage. | | `max_error` | `DOUBLE PRECISION` | This is the starting maximum relative error of the sketch, as a multiple of the actual value. The true error may exceed this if too few buckets are provided for the data distribution. | | `value` | `DOUBLE PRECISION` | Column to aggregate.
### Returns |Column|Type|Description| |---|---|---| | `uddsketch` | `UddSketch` | A UddSketch object which may be passed to other UddSketch APIs. |
### Sample Usages For this example assume we have a table 'samples' with a column 'data' holding `DOUBLE PRECISION` values. The following will simply return a sketch over that column ```SQL ,ignore SELECT uddsketch(100, 0.01, data) FROM samples; ``` It may be more useful to build a view from the aggregate that we can later pass to other uddsketch functions. ```SQL ,ignore CREATE VIEW sketch AS SELECT uddsketch(100, 0.01, data) FROM samples; ``` --- ## **rollup (summary form)** ```SQL ,ignore rollup( sketch uddsketch ) RETURNS UddSketch ``` This will combine multiple already constructed UddSketches, they must have the same size in order to be combined. This is very useful for re-aggregating already constructed uddsketches using the [point form](#uddsketch-point). ### Required Arguments |Name| Type |Description| |---|---|---| | `sketch` | `UddSketch` | The already constructed uddsketch from a previous [uddsketch() (point form)](#uddsketch-point) call. |
### Returns |Column|Type|Description| |---|---|---| | `uddsketch` | `UddSketch` | A UddSketch object which may be passed to other UddSketch APIs. |
### Sample Usages For this example assume we have a table 'samples' with a column 'data' holding `DOUBLE PRECISION` values, and an 'id' column that holds the what series the data belongs to, we can create a view to get the UddSketches for each `id` using the [point form](#uddsketch-point) like so: ```SQL ,ignore CREATE VIEW sketch AS SELECT id, uddsketch(100, 0.01, data) as sketched FROM samples GROUP BY id; ``` Then we can use that view to get the full aggregate like so: ```SQL ,ignore SELECT rollup(sketched) FROM sketch; ``` --- ## **approx_percentile** ```SQL ,ignore approx_percentile( percentile DOUBLE PRECISION, sketch uddsketch ) RETURNS DOUBLE PRECISION ``` Get the approximate value at a percentile from a UddSketch. ### Required Arguments |Name|Type|Description| |---|---|---| | `percentile` | `DOUBLE PRECISION` | The desired percentile (0.0-1.0) to approximate. | | `sketch` | `UddSketch` | The sketch to compute the approx_percentile on. |
### Returns |Column|Type|Description| |---|---|---| | `approx_percentile` | `DOUBLE PRECISION` | The estimated value at the requested percentile. |
### Sample Usage ```SQL SELECT approx_percentile( 0.90, uddsketch(100, 0.01, data) ) FROM generate_series(1, 100) data; ``` ```output approx_percentile -------------------- 90.93094205022494 ``` --- ## **approx_percentile_rank** ```SQL ,ignore approx_percentile_rank( value DOUBLE PRECISION, sketch UddSketch ) RETURNS UddSketch ``` Estimate what percentile a given value would be located at in a UddSketch. ### Required Arguments |Name|Type|Description| |---|---|---| | `value` | `DOUBLE PRECISION` | The value to estimate the percentile of. | | `sketch` | `UddSketch` | The sketch to compute the percentile on. |
### Returns |Column|Type|Description| |---|---|---| | `approx_percentile_rank` | `DOUBLE PRECISION` | The estimated percentile associated with the provided value. |
### Sample Usage ```SQL SELECT approx_percentile_rank( 90, uddsketch(100, 0.01, data) ) FROM generate_series(1, 100) data; ``` ```output approx_percentile_rank ------------------- 0.89 ``` --- ## **error** ```SQL ,ignore error(sketch UddSketch) RETURNS DOUBLE PRECISION ``` This returns the maximum relative error that a percentile estimate will have (relative to the correct value). This will initially be the same as the `max_error` used to construct the UddSketch, but if the sketch has needed to combine buckets this function will return the new maximum error. ### Required Arguments |Name|Type|Description| |---|---|---| | `sketch` | `UddSketch` | The sketch to determine the error of. |
### Returns |Column|Type|Description| |---|---|---| | `error` | `DOUBLE PRECISION` | The maximum relative error of any percentile estimate. |
### Sample Usages ```SQL SELECT error( uddsketch(100, 0.01, data) ) FROM generate_series(1, 100) data; ``` ```output error ------- 0.01 ``` --- ## **mean** ```SQL ,ignore mean(sketch UddSketch) RETURNS DOUBLE PRECISION ``` Get the average of all the values contained in a UddSketch. ### Required Arguments |Name|Type|Description| |---|---|---| | `sketch` | `UddSketch` | The sketch to extract the mean value from. |
### Returns |Column|Type|Description| |---|---|---| | `mean` | `DOUBLE PRECISION` | The average of the values entered into the UddSketch. |
### Sample Usage ```SQL SELECT mean( uddsketch(100, 0.01, data) ) FROM generate_series(1, 100) data; ``` ```output mean ------ 50.5 ``` --- ## **num_vals** ```SQL ,ignore num_vals(sketch UddSketch) RETURNS DOUBLE PRECISION ``` Get the number of values contained in a UddSketch. ### Required Arguments |Name|Type|Description| |---|---|---| | `sketch` | `UddSketch` | The sketch to extract the number of values from. |
### Returns |Column|Type|Description| |---|---|---| | `uddsketch_count` | `DOUBLE PRECISION` | The number of values entered into the UddSketch. |
### Sample Usage ```SQL SELECT num_vals( uddsketch(100, 0.01, data) ) FROM generate_series(1, 100) data; ``` ```output num_vals ----------- 100 ``` --- ================================================ FILE: extension/.gitignore ================================================ .DS_Store .idea/ .vscode/ /target *.iml **/*.rs.bk sql/*.generated.sql ================================================ FILE: extension/Cargo.toml ================================================ [package] name = "timescaledb_toolkit" version = "1.22.0-dev" edition = "2021" [[bin]] name = "pgrx_embed_timescaledb_toolkit" path = "./src/bin/pgrx_embed.rs" [lib] crate-type = ["cdylib", "lib"] [features] default = ["pg18"] pg15 = ["pgrx/pg15", "pgrx-tests/pg15"] pg16 = ["pgrx/pg16", "pgrx-tests/pg16"] pg17 = ["pgrx/pg17", "pgrx-tests/pg17"] pg18 = ["pgrx/pg18", "pgrx-tests/pg18"] pg_test = ["approx"] [dependencies] # Keep synchronized with `cargo install --version N.N.N cargo-pgrx` in Readme.md and docker/ci/Dockerfile # Also `pgrx-tests` down below in `dev-dependencies`. pgrx = "=0.16.1" pgrx-macros = "=0.16.1" pgrx-sql-entity-graph = "=0.16.1" encodings = {path="../crates/encodings"} flat_serialize = {path="../crates/flat_serialize/flat_serialize"} flat_serialize_macro = {path="../crates/flat_serialize/flat_serialize_macro"} tdigest = {path="../crates/t-digest"} hyperloglogplusplus = {path="../crates/hyperloglogplusplus"} uddsketch = {path="../crates/udd-sketch"} counter-agg = {path="../crates/counter-agg"} stats_agg = {path="../crates/stats-agg"} time_weighted_average = {path="../crates/time-weighted-average"} tspoint = {path="../crates/tspoint"} asap = {path="../crates/asap"} countminsketch = {path="../crates/count-min-sketch"} aggregate_builder = {path="../crates/aggregate_builder"} approx = {version = "0.4.0", optional = true} bincode = "1.3.1" serde = { version = "1.0", features = ["derive"] } once_cell = "1.8.0" ordered-float = {version = "1.0", features = ["serde"] } paste = "1.0" rand = { version = "0.8.3", features = ["getrandom", "small_rng"] } rand_distr = "0.4.0" rand_chacha = "0.3.0" ron="0.6.0" tera = { version = "1.17.0", default-features = false } twofloat = { version = "0.6.0", features = ["serde"] } num-traits = "0.2.15" pest = "=2.3.0" pest_derive = "=2.3.0" spfunc = "0.1.0" statrs = "0.15.0" [dev-dependencies] pgrx-tests = "=0.16.1" approx = "0.4.0" ================================================ FILE: extension/src/accessors/tests.rs ================================================ use pgrx::*; use super::accessor; //use crate::{accessor, build}; // TODO don't require that trailing comma accessor! { one_field(value: f64,) } accessor! { two_fields(a: f64, b: i64,) } #[test] fn one_field_works() { let d: AccessorOneField = accessor_one_field(1.0); assert_eq!(1.0, d.value); } #[test] fn two_field_works() { let d: AccessorTwoFields = accessor_two_fields(1.0, 2); assert_eq!((1.0, 2), (d.a, d.b)); } ================================================ FILE: extension/src/accessors.rs ================================================ use pgrx::*; use counter_agg::range::I64Range; use crate::{build, flatten, pg_type, ron_inout_funcs}; macro_rules! accessor { ( $name: ident ( $($field:ident : $typ: tt),* $(,)? ) ) => { ::paste::paste! { $crate::pg_type!{ #[derive(Debug)] struct [] { $($field: $typ,)* } } $crate::ron_inout_funcs!([]); } accessor_fn_impl! { $name( $( $field: $typ),* ) } }; } macro_rules! accessor_fn_impl { ( $name: ident ( $( $field:ident : $typ: tt ),* $(,)? ) ) => { ::paste::paste!{ #[pg_extern(immutable, parallel_safe, name = "" $name "")] fn []( $( $field: $typ ),* ) -> [] { $crate::build! { [] { $( $field ),* } } } } }; } accessor! { approx_percentile( percentile: f64, ) } accessor! { approx_percentile_rank( value: f64, ) } accessor! { num_vals() } accessor! { mean() } accessor! { error() } accessor! { min_val() } accessor! { max_val() } accessor! { average() } accessor! { average_x() } accessor! { average_y() } accessor! { sum() } accessor! { sum_x() } accessor! { sum_y() } accessor! { slope() } accessor! { corr() } accessor! { intercept() } accessor! { x_intercept() } accessor! { determination_coeff() } accessor! { distinct_count() } accessor! { stderror() } accessor! { delta() } accessor! { time_delta() } accessor! { rate() } accessor! { irate_left() } accessor! { irate_right() } accessor! { idelta_left() } accessor! { idelta_right() } accessor! { num_elements() } accessor! { num_changes() } accessor! { num_resets() } accessor! { counter_zero_time() } accessor! { first_val() } accessor! { last_val() } accessor! { first_time() } accessor! { last_time() } accessor! { open() } accessor! { close() } accessor! { high() } accessor! { low() } accessor! { open_time() } accessor! { high_time() } accessor! { low_time() } accessor! { close_time() } accessor! { live_ranges() } accessor! { dead_ranges() } accessor! { uptime() } accessor! { downtime() } accessor! { into_values() } accessor! { into_array() } accessor! { into_int_values() } accessor! { state_timeline() } accessor! { state_int_timeline() } accessor! { num_live_ranges() } accessor! { num_gaps() } accessor! { topn() } // The rest are more complex, with String or other challenges. Leaving alone for now. pg_type! { #[derive(Debug)] struct AccessorLiveAt { time: u64, } } ron_inout_funcs!(AccessorLiveAt); #[pg_extern(immutable, parallel_safe, name = "live_at")] pub fn accessor_live_at(ts: crate::raw::TimestampTz) -> AccessorLiveAt { unsafe { flatten! { AccessorLiveAt { time: ts.0.value() as u64, } } } } pg_type! { #[derive(Debug)] struct AccessorStdDev { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorStdDev); #[pg_extern(immutable, parallel_safe, name = "stddev")] pub fn accessor_stddev(method: default!(&str, "'sample'")) -> AccessorStdDev { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorStdDev { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorStdDevX { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorStdDevX); #[pg_extern(immutable, parallel_safe, name = "stddev_x")] pub fn accessor_stddev_x(method: default!(&str, "'sample'")) -> AccessorStdDevX { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorStdDevX { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorStdDevY { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorStdDevY); #[pg_extern(immutable, parallel_safe, name = "stddev_y")] pub fn accessor_stddev_y(method: default!(&str, "'sample'")) -> AccessorStdDevY { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorStdDevY { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorVariance { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorVariance); #[pg_extern(immutable, parallel_safe, name = "variance")] pub fn accessor_variance(method: default!(&str, "'sample'")) -> AccessorVariance { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorVariance { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorVarianceX { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorVarianceX); #[pg_extern(immutable, parallel_safe, name = "variance_x")] pub fn accessor_variance_x(method: default!(&str, "'sample'")) -> AccessorVarianceX { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorVarianceX { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorVarianceY { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorVarianceY); #[pg_extern(immutable, parallel_safe, name = "variance_y")] pub fn accessor_variance_y(method: default!(&str, "'sample'")) -> AccessorVarianceY { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorVarianceY { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorSkewness { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorSkewness); #[pg_extern(immutable, parallel_safe, name = "skewness")] pub fn accessor_skewness(method: default!(&str, "'sample'")) -> AccessorSkewness { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorSkewness { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorSkewnessX { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorSkewnessX); #[pg_extern(immutable, parallel_safe, name = "skewness_x")] pub fn accessor_skewness_x(method: default!(&str, "'sample'")) -> AccessorSkewnessX { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorSkewnessX { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorSkewnessY { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorSkewnessY); #[pg_extern(immutable, parallel_safe, name = "skewness_y")] pub fn accessor_skewness_y(method: default!(&str, "'sample'")) -> AccessorSkewnessY { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorSkewnessY { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorKurtosis { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorKurtosis); #[pg_extern(immutable, parallel_safe, name = "kurtosis")] pub fn accessor_kurtosis(method: default!(&str, "'sample'")) -> AccessorKurtosis { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorKurtosis { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorKurtosisX { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorKurtosisX); #[pg_extern(immutable, parallel_safe, name = "kurtosis_x")] pub fn accessor_kurtosis_x(method: default!(&str, "'sample'")) -> AccessorKurtosisX { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorKurtosisX { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorKurtosisY { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorKurtosisY); #[pg_extern(immutable, parallel_safe, name = "kurtosis_y")] pub fn accessor_kurtosis_y(method: default!(&str, "'sample'")) -> AccessorKurtosisY { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorKurtosisY { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorCovar { method: crate::stats_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorCovar); #[pg_extern(immutable, parallel_safe, name = "covariance")] pub fn accessor_covar(method: default!(&str, "'sample'")) -> AccessorCovar { let method_enum = crate::stats_agg::method_kind(method); unsafe { flatten! { AccessorCovar { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorExtrapolatedDelta { method: crate::counter_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorExtrapolatedDelta); #[pg_extern(immutable, parallel_safe, name = "extrapolated_delta")] pub fn accessor_extrapolated_delta(method: &str) -> AccessorExtrapolatedDelta { let method_enum = crate::counter_agg::method_kind(method); unsafe { flatten! { AccessorExtrapolatedDelta { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorExtrapolatedRate { method: crate::counter_agg::Method, } } //FIXME string IO ron_inout_funcs!(AccessorExtrapolatedRate); #[pg_extern(immutable, parallel_safe, name = "extrapolated_rate")] pub fn accessor_extrapolated_rate(method: &str) -> AccessorExtrapolatedRate { let method_enum = crate::counter_agg::method_kind(method); unsafe { flatten! { AccessorExtrapolatedRate { method: method_enum, } } } } pg_type! { #[derive(Debug)] struct AccessorWithBounds { lower: i64, upper: i64, range_null: u8, lower_present: u8, upper_present: u8, } } ron_inout_funcs!(AccessorWithBounds); #[pg_extern(immutable, parallel_safe, name = "with_bounds")] pub fn accessor_with_bounds(bounds: crate::raw::tstzrange) -> AccessorWithBounds { let range = unsafe { crate::range::get_range(bounds.0.cast_mut_ptr()) }; let mut accessor = build! { AccessorWithBounds { lower: 0, upper: 0, range_null: 0, lower_present: 0, upper_present: 0, } }; match range { None => accessor.range_null = 1, Some(range) => { if let Some(left) = range.left { accessor.lower_present = 1; accessor.lower = left; } if let Some(right) = range.right { accessor.upper_present = 1; accessor.upper = right; } } } accessor } impl AccessorWithBounds { pub fn bounds(&self) -> Option { if self.range_null != 0 { return None; } I64Range { left: (self.lower_present != 0).then(|| self.lower), right: (self.upper_present != 0).then(|| self.upper), } .into() } } pg_type! { #[derive(Debug)] struct AccessorUnnest { } } ron_inout_funcs!(AccessorUnnest); // Note that this should be able to replace the timescale_experimental.unnest function // and related object in src/timevector/pipeline/expansion.rs #[pg_extern(immutable, parallel_safe, name = "unnest")] pub fn accessor_unnest() -> AccessorUnnest { build! { AccessorUnnest { } } } pg_type! { #[derive(Debug)] struct AccessorIntegral { len: u8, bytes: [u8; 16], } } // FIXME string IO ron_inout_funcs!(AccessorIntegral); #[pg_extern(immutable, parallel_safe, name = "integral")] pub fn accessor_integral(unit: default!(&str, "'second'")) -> AccessorIntegral { if unit.len() > 16 { pgrx::error!( "Time unit string too long: {} characters (max 16)", unit.len() ); } let mut bytes = [0u8; 16]; let unit_bytes = unit.as_bytes(); bytes[..unit_bytes.len()].copy_from_slice(unit_bytes); unsafe { flatten! { AccessorIntegral { len: unit.len() as u8, bytes, } } } } // Note we also have a AccessorTopn which is similar to this but doesn't store the count pg_type! { #[derive(Debug)] struct AccessorTopNCount { count: i64, } } ron_inout_funcs!(AccessorTopNCount); #[pg_extern(immutable, parallel_safe, name = "topn")] pub fn accessor_topn_count(count: i64) -> AccessorTopNCount { unsafe { flatten! { AccessorTopNCount { count } } } } pg_type! { #[derive(Debug)] struct AccessorMaxFrequencyInt { value: i64, } } ron_inout_funcs!(AccessorMaxFrequencyInt); #[pg_extern(immutable, parallel_safe, name = "max_frequency")] pub fn accessor_max_frequency_int(value: i64) -> AccessorMaxFrequencyInt { unsafe { flatten! { AccessorMaxFrequencyInt { value } } } } pg_type! { #[derive(Debug)] struct AccessorMinFrequencyInt { value: i64, } } ron_inout_funcs!(AccessorMinFrequencyInt); #[pg_extern(immutable, parallel_safe, name = "min_frequency")] pub fn accessor_min_frequency_int(value: i64) -> AccessorMinFrequencyInt { unsafe { flatten! { AccessorMinFrequencyInt { value } } } } pg_type! { #[derive(Debug)] struct AccessorPercentileArray { len: u64, percentile: [f64; 32], } } ron_inout_funcs!(AccessorPercentileArray); #[pg_extern(immutable, name = "approx_percentiles")] pub fn accessor_percentiles(unit: Vec) -> AccessorPercentileArray { if unit.len() > 32 { pgrx::error!("Too many percentiles: {} (max 32)", unit.len()); } let mut percentile = [0.0f64; 32]; for (i, &val) in unit.iter().enumerate() { percentile[i] = val; } unsafe { flatten! { AccessorPercentileArray { len: unit.len() as u64, percentile, } } } } ================================================ FILE: extension/src/aggregate_builder_tests.rs ================================================ // Tests for `aggregate_builder::aggregate`. This can't be in the // aggregate_builder crate because it requires too much of postgres to actually // function use aggregate_builder::aggregate; use pgrx::*; use crate::{palloc::Inner, raw::bytea}; // just about the simplest aggregate `arbitrary()` returns an arbitrary element // from the input set. We have three versions // 1. `anything()` tests that the minimal functionality works. // 2. `cagg_anything()` tests that the config we use for caggs (serialization // but not parallel-safe) outputs the expected config. // 3. `parallel_anything()` tests that the parallel version outputs the expected // config. #[aggregate] impl toolkit_experimental::anything { type State = String; fn transition(state: Option, #[sql_type("text")] value: String) -> Option { state.or(Some(value)) } fn finally(state: Option<&mut State>) -> Option { state.as_deref().cloned() } } #[aggregate] impl toolkit_experimental::cagg_anything { type State = String; fn transition(state: Option, #[sql_type("text")] value: String) -> Option { state.or(Some(value)) } fn finally(state: Option<&mut State>) -> Option { state.as_deref().cloned() } fn serialize(state: &State) -> bytea { crate::do_serialize!(state) } fn deserialize(bytes: bytea) -> State { crate::do_deserialize!(bytes, State) } fn combine(a: Option<&State>, b: Option<&State>) -> Option { a.or(b).cloned() } } #[aggregate] impl toolkit_experimental::parallel_anything { type State = String; fn transition(state: Option, #[sql_type("text")] value: String) -> Option { state.or(Some(value)) } fn finally(state: Option<&mut State>) -> Option { state.as_deref().cloned() } const PARALLEL_SAFE: bool = true; fn serialize(state: &State) -> bytea { crate::do_serialize!(state) } fn deserialize(bytes: bytea) -> State { crate::do_deserialize!(bytes, State) } fn combine(a: Option<&State>, b: Option<&State>) -> Option { a.or(b).cloned() } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_anything_in_experimental_and_returns_first() { Spi::connect_mut(|client| { let output = client .update( "SELECT toolkit_experimental.anything(val) \ FROM (VALUES ('foo'), ('bar'), ('baz')) as v(val)", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(output.as_deref(), Some("foo")); }) } #[pg_test] fn test_anything_has_correct_fn_names_and_def() { Spi::connect_mut(|client| { let spec = get_aggregate_spec(client, "anything"); // output is // fn kind (`a`), volatility, parallel-safety, num args, final fn modify (is this right?) // transition type (`internal`) // output type // transition fn name, // final fn name, // serialize fn name or - if none, // deserialize fn name or - if none, assert_eq!( spec, "(\ a,i,u,1,r,\ internal,\ text,\ toolkit_experimental.anything_transition_fn_outer,\ toolkit_experimental.anything_finally_fn_outer,\ -,\ -,\ -\ )" ); }); } #[pg_test] fn test_cagg_anything_has_correct_fn_names_and_def() { Spi::connect_mut(|client| { let spec = get_aggregate_spec(client, "cagg_anything"); // output is // fn kind (`a`), volatility, parallel-safety, num args, final fn modify (is this right?) // transition type (`internal`) // output type // transition fn name, // final fn name, // serialize fn name or - if none, // deserialize fn name or - if none, assert_eq!( spec, "(\ a,i,u,1,r,\ internal,\ text,\ toolkit_experimental.cagg_anything_transition_fn_outer,\ toolkit_experimental.cagg_anything_finally_fn_outer,\ toolkit_experimental.cagg_anything_serialize_fn_outer,\ toolkit_experimental.cagg_anything_deserialize_fn_outer,\ toolkit_experimental.cagg_anything_combine_fn_outer\ )" ); }); } #[pg_test] fn test_parallel_anything_has_correct_fn_names_and_def() { Spi::connect_mut(|client| { let spec = get_aggregate_spec(client, "parallel_anything"); // output is // fn kind (`a`), volatility, parallel-safety, num args, final fn modify (is this right?) // transition type (`internal`) // output type // transition fn name, // final fn name, // serialize fn name or - if none, // deserialize fn name or - if none, assert_eq!( spec, "(\ a,i,s,1,r,\ internal,\ text,\ toolkit_experimental.parallel_anything_transition_fn_outer,\ toolkit_experimental.parallel_anything_finally_fn_outer,\ toolkit_experimental.parallel_anything_serialize_fn_outer,\ toolkit_experimental.parallel_anything_deserialize_fn_outer,\ toolkit_experimental.parallel_anything_combine_fn_outer\ )" ); }); } // It gets annoying, and segfaulty to handle many arguments from the Spi. // For simplicity, we just return a single string representing the tuple // and use string-comparison. fn get_aggregate_spec(client: &mut spi::SpiClient, aggregate_name: &str) -> String { client .update( &format!( r#"SELECT ( prokind, provolatile, proparallel, pronargs, aggfinalmodify, aggtranstype::regtype, prorettype::regtype, aggtransfn, aggfinalfn, aggserialfn, aggdeserialfn, aggcombinefn)::TEXT FROM pg_proc, pg_aggregate WHERE proname = '{aggregate_name}' AND pg_proc.oid = aggfnoid;"# ), None, &[], ) .unwrap() .first() .get_one::() .unwrap() .expect("no aggregate found") } } ================================================ FILE: extension/src/aggregate_utils.rs ================================================ use std::ptr::null_mut; use pgrx::pg_sys; // TODO move to func_utils once there are enough function to warrant one pub unsafe fn get_collation(fcinfo: pg_sys::FunctionCallInfo) -> Option { if (*fcinfo).fncollation == pg_sys::Oid::INVALID { None } else { Some((*fcinfo).fncollation) } } pub fn get_collation_or_default(fcinfo: pg_sys::FunctionCallInfo) -> Option { if fcinfo.is_null() { Some(pg_sys::Oid::from(100)) // TODO: default OID, there should be a constant for this } else { unsafe { get_collation(fcinfo) } } } pub unsafe fn in_aggregate_context T>( fcinfo: pg_sys::FunctionCallInfo, f: F, ) -> T { let mctx = aggregate_mctx(fcinfo).unwrap_or_else(|| pgrx::error!("cannot call as non-aggregate")); crate::palloc::in_memory_context(mctx, f) } pub unsafe fn aggregate_mctx(fcinfo: pg_sys::FunctionCallInfo) -> Option { if fcinfo.is_null() { return Some(pg_sys::CurrentMemoryContext); } let mut mctx = null_mut(); let is_aggregate = pg_sys::AggCheckCallContext(fcinfo, &mut mctx); if is_aggregate == 0 { None } else { debug_assert!(!mctx.is_null()); Some(mctx) } } ================================================ FILE: extension/src/asap.rs ================================================ use asap::*; use pgrx::*; use serde::{Deserialize, Serialize}; use crate::{ aggregate_utils::in_aggregate_context, palloc::{Inner, Internal, InternalAsValue, ToInternal}, time_vector, }; use tspoint::TSPoint; use crate::time_vector::{Timevector_TSTZ_F64, Timevector_TSTZ_F64Data}; #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ASAPTransState { ts: Vec, sorted: bool, resolution: i32, } #[pg_extern(immutable, parallel_safe)] pub fn asap_trans( state: Internal, ts: Option, val: Option, resolution: i32, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { asap_trans_internal(unsafe { state.to_inner() }, ts, val, resolution, fcinfo).internal() } pub fn asap_trans_internal( state: Option>, ts: Option, val: Option, resolution: i32, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let p = match (ts, val) { (_, None) => return state, (None, _) => return state, (Some(ts), Some(val)) => TSPoint { ts: ts.into(), val }, }; match state { None => Some( ASAPTransState { ts: vec![p], sorted: true, resolution, } .into(), ), Some(mut s) => { s.add_point(p); Some(s) } } }) } } impl ASAPTransState { fn add_point(&mut self, point: TSPoint) { self.ts.push(point); if let Some(window) = self.ts.windows(2).last() { if window[0].ts > window[1].ts { self.sorted = false } } } } #[pg_extern(immutable, parallel_safe)] fn asap_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { asap_final_inner(unsafe { state.to_inner() }, fcinfo) } fn asap_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let state = match state { None => return None, Some(state) => state.clone(), }; let mut points = state.ts; if !state.sorted { points.sort_by_key(|p| p.ts); } let start_ts = points.first().unwrap().ts; let end_ts = points.last().unwrap().ts; let mut values: Vec = points.iter().map(|p| p.val).collect(); values = asap_smooth(&values, state.resolution as u32); let interval = if values.len() > 1 { (end_ts - start_ts) / (values.len() - 1) as i64 } else { 1 }; let points: Vec<_> = values .into_iter() .enumerate() .map(|(i, val)| TSPoint { ts: start_ts + i as i64 * interval, val, }) .collect(); let nulls_len = points.len().div_ceil(8); Some(crate::build! { Timevector_TSTZ_F64 { num_points: points.len() as u32, flags: time_vector::FLAG_IS_SORTED, internal_padding: [0; 3], points: points.into(), null_val: std::vec::from_elem(0_u8, nulls_len).into(), } }) }) } } #[pg_extern(name = "asap_smooth", immutable, parallel_safe)] pub fn asap_on_timevector( mut series: Timevector_TSTZ_F64<'static>, resolution: i32, ) -> Option> { // TODO: implement this using zero copy (requires sort, find_downsample_interval, and downsample_and_gapfill on Timevector) let needs_sort = series.is_sorted(); if needs_sort { series.points.as_owned().sort_by_key(|p| p.ts); } let start_ts = series.points.as_slice().first().unwrap().ts; let end_ts = series.points.as_slice().last().unwrap().ts; let values: Vec = series.points.as_slice().iter().map(|p| p.val).collect(); let result = asap_smooth(&values, resolution as u32); let interval = if result.len() > 1 { (end_ts - start_ts) / (result.len() - 1) as i64 } else { 1 }; let points: Vec<_> = result .into_iter() .enumerate() .map(|(i, val)| TSPoint { ts: start_ts + i as i64 * interval, val, }) .collect(); let nulls_len = points.len().div_ceil(8); Some(crate::build! { Timevector_TSTZ_F64 { num_points: points.len() as u32, flags: time_vector::FLAG_IS_SORTED, internal_padding: [0; 3], points: points.into(), null_val: std::vec::from_elem(0_u8, nulls_len).into(), } }) } // Aggregate on only values (assumes aggregation over ordered normalized timestamp) extension_sql!( "\n\ CREATE AGGREGATE asap_smooth(ts TIMESTAMPTZ, value DOUBLE PRECISION, resolution INT)\n\ (\n\ sfunc = asap_trans,\n\ stype = internal,\n\ finalfunc = asap_final\n\ );\n", name = "asap_agg", requires = [asap_trans, asap_final], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use approx::assert_relative_eq; use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_against_reference() { // Test our ASAP implementation against the reference implementation at http://www.futuredata.io.s3-website-us-west-2.amazonaws.com/asap/ // The sample data is the first 100 points of the second sample data set. Note that the dates are not important for this test. Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let mut result = client.update( " SELECT value FROM unnest( (SELECT asap_smooth('2020-1-1'::timestamptz + i * '1d'::interval, val, 10) FROM (VALUES (1,1.1),(2,4.4),(3,7.5),(4,8.9),(5,11.7),(6,15),(7,15.3),(8,15.6),(9,13.3),(10,11.1), (11,7.5),(12,5.8),(13,5.6),(14,4.2),(15,4.7),(16,7.2),(17,11.4),(18,15.3),(19,15),(20,16.2), (21,14.4),(22,8.6),(23,5.3),(24,3.3),(25,4.4),(26,3.3),(27,5),(28,8.1),(29,10.8),(30,12.2), (31,13.8),(32,13.3),(33,12.8),(34,9.4),(35,6.9),(36,3.9),(37,1.1),(38,4.2),(39,4.2),(40,8.4), (41,13.4),(42,16.4),(43,16),(44,15.6),(45,14.7),(46,10.2),(47,6.1),(48,1.8),(49,4.2),(50,5), (51,5.1),(52,9.2),(53,13.6),(54,14.9),(55,16.9),(56,16.9),(57,14.4),(58,10.8),(59,4.7),(60,3.6), (61,3.9),(62,2.4),(63,7.1),(64,8.3),(65,12.5),(66,16.4),(67,16.9),(68,16),(69,12.8),(70,9.1), (71,7.2),(72,1.6),(73,1.2),(74,2.3),(75,2.8),(76,7.1),(77,10.3),(78,15.1),(79,16.8),(80,15.7), (81,16.6),(82,10.1),(83,8.1),(84,5),(85,4.1),(86,4.7),(87,6.2),(88,8.7),(89,12.4),(90,14), (91,15.3),(92,16.3),(93,15.3),(94,10.9),(95,9.2),(96,3.4),(97,1.9),(98,2.2),(99,6),(100,6.8) ) AS v(i, val) )) s", None, &[]).unwrap(); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 10.39 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 9.29 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 7.54 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 7.8 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 10.34 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 11.01 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 10.54 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 8.01 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 8.99 ); assert_relative_eq!( result.next().unwrap()[1].value::().unwrap().unwrap() as f32, 8.73 ); assert!(result.next().is_none()); }) } #[pg_test] fn test_asap_equivalence() { Spi::connect_mut(|client| { let mut value_result = client.update( " SELECT time::text, value FROM unnest( (SELECT asap_smooth('2020-1-1'::timestamptz + i * '1d'::interval, val, 10) FROM (VALUES (1,1.1),(2,4.4),(3,7.5),(4,8.9),(5,11.7),(6,15),(7,15.3),(8,15.6),(9,13.3),(10,11.1), (11,7.5),(12,5.8),(13,5.6),(14,4.2),(15,4.7),(16,7.2),(17,11.4),(18,15.3),(19,15),(20,16.2), (21,14.4),(22,8.6),(23,5.3),(24,3.3),(25,4.4),(26,3.3),(27,5),(28,8.1),(29,10.8),(30,12.2), (31,13.8),(32,13.3),(33,12.8),(34,9.4),(35,6.9),(36,3.9),(37,1.1),(38,4.2),(39,4.2),(40,8.4), (41,13.4),(42,16.4),(43,16),(44,15.6),(45,14.7),(46,10.2),(47,6.1),(48,1.8),(49,4.2),(50,5), (51,5.1),(52,9.2),(53,13.6),(54,14.9),(55,16.9),(56,16.9),(57,14.4),(58,10.8),(59,4.7),(60,3.6), (61,3.9),(62,2.4),(63,7.1),(64,8.3),(65,12.5),(66,16.4),(67,16.9),(68,16),(69,12.8),(70,9.1), (71,7.2),(72,1.6),(73,1.2),(74,2.3),(75,2.8),(76,7.1),(77,10.3),(78,15.1),(79,16.8),(80,15.7), (81,16.6),(82,10.1),(83,8.1),(84,5),(85,4.1),(86,4.7),(87,6.2),(88,8.7),(89,12.4),(90,14), (91,15.3),(92,16.3),(93,15.3),(94,10.9),(95,9.2),(96,3.4),(97,1.9),(98,2.2),(99,6),(100,6.8) ) AS v(i, val) )) s", None, &[]).unwrap(); let mut tvec_result = client.update( " SELECT time::text, value FROM unnest( (SELECT asap_smooth( (SELECT timevector('2020-1-1'::timestamptz + i * '1d'::interval, val) FROM (VALUES (1,1.1),(2,4.4),(3,7.5),(4,8.9),(5,11.7),(6,15),(7,15.3),(8,15.6),(9,13.3),(10,11.1), (11,7.5),(12,5.8),(13,5.6),(14,4.2),(15,4.7),(16,7.2),(17,11.4),(18,15.3),(19,15),(20,16.2), (21,14.4),(22,8.6),(23,5.3),(24,3.3),(25,4.4),(26,3.3),(27,5),(28,8.1),(29,10.8),(30,12.2), (31,13.8),(32,13.3),(33,12.8),(34,9.4),(35,6.9),(36,3.9),(37,1.1),(38,4.2),(39,4.2),(40,8.4), (41,13.4),(42,16.4),(43,16),(44,15.6),(45,14.7),(46,10.2),(47,6.1),(48,1.8),(49,4.2),(50,5), (51,5.1),(52,9.2),(53,13.6),(54,14.9),(55,16.9),(56,16.9),(57,14.4),(58,10.8),(59,4.7),(60,3.6), (61,3.9),(62,2.4),(63,7.1),(64,8.3),(65,12.5),(66,16.4),(67,16.9),(68,16),(69,12.8),(70,9.1), (71,7.2),(72,1.6),(73,1.2),(74,2.3),(75,2.8),(76,7.1),(77,10.3),(78,15.1),(79,16.8),(80,15.7), (81,16.6),(82,10.1),(83,8.1),(84,5),(85,4.1),(86,4.7),(87,6.2),(88,8.7),(89,12.4),(90,14), (91,15.3),(92,16.3),(93,15.3),(94,10.9),(95,9.2),(96,3.4),(97,1.9),(98,2.2),(99,6),(100,6.8) ) AS v(i, val) ), 10) ))", None, &[]).unwrap(); for _ in 0..10 { let v = value_result.next().unwrap(); let t = tvec_result.next().unwrap(); assert_eq!(v[1].value::<&str>(), t[1].value::<&str>()); assert_eq!(v[2].value::().unwrap(), t[2].value::().unwrap()); } assert!(value_result.next().is_none()); assert!(tvec_result.next().is_none()); }) } } ================================================ FILE: extension/src/bin/pgrx_embed.rs ================================================ // so we can support upgrading pgrx #![allow(unexpected_cfgs)] ::pgrx::pgrx_embed!(); ================================================ FILE: extension/src/candlestick.rs ================================================ use pgrx::*; use serde::{Deserialize, Serialize}; use crate::accessors::{ AccessorClose, AccessorCloseTime, AccessorHigh, AccessorHighTime, AccessorLow, AccessorLowTime, AccessorOpen, AccessorOpenTime, }; use crate::{ aggregate_utils::in_aggregate_context, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, raw::bytea, ron_inout_funcs, }; use tspoint::TSPoint; flat_serialize_macro::flat_serialize! { #[derive(Serialize, Deserialize, Debug, Copy)] enum VolKind { unused_but_required_by_flat_serialize: u64, Missing: 1 {}, Transaction: 2 { vol: f64, vwap: f64 }, } } pg_type! { #[derive(Debug, Copy)] struct Candlestick { open: TSPoint, high: TSPoint, low: TSPoint, close: TSPoint, #[flat_serialize::flatten] volume: VolKind, } } impl Candlestick { pub fn new(ts: i64, open: f64, high: f64, low: f64, close: f64, volume: Option) -> Self { let volume = match volume { None => VolKind::Missing {}, Some(volume) => { let typical = (high + low + close) / 3.0; VolKind::Transaction { vol: volume, vwap: volume * typical, } } }; unsafe { flatten!(Candlestick { open: TSPoint { ts, val: open }, high: TSPoint { ts, val: high }, low: TSPoint { ts, val: low }, close: TSPoint { ts, val: close }, volume, }) } } pub fn from_tick(ts: i64, price: f64, volume: Option) -> Self { Candlestick::new(ts, price, price, price, price, volume) } pub fn add_tick_data(&mut self, ts: i64, price: f64, volume: Option) { if ts < self.open.ts { self.open = TSPoint { ts, val: price }; } if price > self.high.val { self.high = TSPoint { ts, val: price }; } if price < self.low.val { self.low = TSPoint { ts, val: price }; } if ts > self.close.ts { self.close = TSPoint { ts, val: price }; } if let (VolKind::Transaction { vol, vwap }, Some(volume)) = (self.volume, volume) { self.volume = VolKind::Transaction { vol: vol + volume, vwap: vwap + volume * price, }; } else { self.volume = VolKind::Missing {}; }; } pub fn combine(&mut self, candlestick: &Candlestick) { if candlestick.open.ts < self.open.ts { self.open = candlestick.open; } if candlestick.high.val > self.high.val { self.high = candlestick.high; } if candlestick.low.val < self.low.val { self.low = candlestick.low; } if candlestick.close.ts > self.close.ts { self.close = candlestick.close; } if let ( VolKind::Transaction { vol: vol1, vwap: vwap1, }, VolKind::Transaction { vol: vol2, vwap: vwap2, }, ) = (self.volume, candlestick.volume) { self.volume = VolKind::Transaction { vol: vol1 + vol2, vwap: vwap1 + vwap2, }; } else { self.volume = VolKind::Missing {}; }; } pub fn open(&self) -> f64 { self.open.val } pub fn high(&self) -> f64 { self.high.val } pub fn low(&self) -> f64 { self.low.val } pub fn close(&self) -> f64 { self.close.val } pub fn open_time(&self) -> i64 { self.open.ts } pub fn high_time(&self) -> i64 { self.high.ts } pub fn low_time(&self) -> i64 { self.low.ts } pub fn close_time(&self) -> i64 { self.close.ts } pub fn volume(&self) -> Option { match self.volume { VolKind::Transaction { vol, .. } => Some(vol), VolKind::Missing {} => None, } } pub fn vwap(&self) -> Option { match self.volume { VolKind::Transaction { vol, vwap } => { if vol > 0.0 && vwap.is_finite() { Some(vwap / vol) } else { None } } VolKind::Missing {} => None, } } } ron_inout_funcs!(Candlestick); #[pg_extern(immutable, parallel_safe)] pub fn candlestick( ts: Option, open: Option, high: Option, low: Option, close: Option, volume: Option, ) -> Option { match ts { Some(ts) => Some(Candlestick::new( ts.into(), open?, high?, low?, close?, volume, )), None => None, } } #[pg_extern(immutable, parallel_safe)] pub fn tick_data_no_vol_transition( state: Internal, ts: Option, price: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { tick_data_transition_inner(unsafe { state.to_inner() }, ts, price, None, fcinfo).internal() } #[pg_extern(immutable, parallel_safe)] pub fn tick_data_transition( state: Internal, ts: Option, price: Option, volume: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { tick_data_transition_inner(unsafe { state.to_inner() }, ts, price, volume, fcinfo).internal() } pub fn tick_data_transition_inner( state: Option>, ts: Option, price: Option, volume: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { if let (Some(ts), Some(price)) = (ts, price) { match state { None => { let cs = Candlestick::from_tick(ts.into(), price, volume); Some(cs.into()) } Some(mut cs) => { cs.add_tick_data(ts.into(), price, volume); Some(cs) } } } else { state } }) } } #[pg_extern(immutable, parallel_safe)] pub fn candlestick_rollup_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { candlestick_rollup_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn candlestick_rollup_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, value) { (state, None) => state, (None, Some(value)) => Some(value.into()), (Some(state), Some(value)) => { let mut state = *state; state.combine(&value); Some(state.into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn candlestick_final(state: Internal, fcinfo: pg_sys::FunctionCallInfo) -> Option { unsafe { candlestick_final_inner(state.to_inner(), fcinfo) } } pub fn candlestick_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { in_aggregate_context(fcinfo, || { let state = match state { None => return None, Some(state) => *state, }; Some(state) }) } } #[pg_extern(immutable, parallel_safe)] pub fn candlestick_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { candlestick_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn candlestick_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (None, Some(only)) | (Some(only), None) => Some((*only).into()), (Some(a), Some(b)) => { let (mut a, b) = (*a, *b); a.combine(&b); Some(a.into()) } }) } } #[pg_extern(immutable, parallel_safe, strict)] pub fn candlestick_serialize(state: Internal) -> bytea { let mut state = state; let cs: &mut Candlestick = unsafe { state.get_mut().unwrap() }; let ser = &**cs; crate::do_serialize!(ser) } #[pg_extern(immutable, parallel_safe, strict)] pub fn candlestick_deserialize(bytes: bytea, _internal: Internal) -> Option { candlestick_deserialize_inner(bytes).internal() } pub fn candlestick_deserialize_inner(bytes: bytea) -> Inner { let de: CandlestickData = crate::do_deserialize!(bytes, CandlestickData); let cs: Candlestick = de.into(); cs.into() } extension_sql!( "\n\ CREATE AGGREGATE candlestick_agg( \n\ ts TIMESTAMPTZ,\n\ price DOUBLE PRECISION,\n\ volume DOUBLE PRECISION\n\ )\n\ (\n\ sfunc = tick_data_transition,\n\ stype = internal,\n\ finalfunc = candlestick_final,\n\ combinefunc = candlestick_combine,\n\ serialfunc = candlestick_serialize,\n\ deserialfunc = candlestick_deserialize,\n\ parallel = safe\n\ );\n", name = "candlestick_agg", requires = [ tick_data_transition, candlestick_final, candlestick_combine, candlestick_serialize, candlestick_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup( candlestick Candlestick)\n\ (\n\ sfunc = candlestick_rollup_trans,\n\ stype = internal,\n\ finalfunc = candlestick_final,\n\ combinefunc = candlestick_combine,\n\ serialfunc = candlestick_serialize,\n\ deserialfunc = candlestick_deserialize,\n\ parallel = safe\n\ );\n", name = "candlestick_rollup", requires = [ candlestick_rollup_trans, candlestick_final, candlestick_combine, candlestick_serialize, candlestick_deserialize ], ); #[pg_extern(immutable, parallel_safe)] #[opname(->)] pub fn arrow_open(candlestick: Option, _accessor: AccessorOpen) -> Option { candlestick.map(|cs| cs.open()) } #[pg_extern(immutable, parallel_safe)] pub fn open(candlestick: Option) -> Option { candlestick.map(|cs| cs.open()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_high(candlestick: Option, _accessor: AccessorHigh) -> Option { candlestick.map(|cs| cs.high()) } #[pg_extern(immutable, parallel_safe)] pub fn high(candlestick: Option) -> Option { candlestick.map(|cs| cs.high()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_low(candlestick: Option, _accessor: AccessorLow) -> Option { candlestick.map(|cs| cs.low()) } #[pg_extern(immutable, parallel_safe)] pub fn low(candlestick: Option) -> Option { candlestick.map(|cs| cs.low()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_close(candlestick: Option, _accessor: AccessorClose) -> Option { candlestick.map(|cs| cs.close()) } #[pg_extern(immutable, parallel_safe)] pub fn close(candlestick: Option) -> Option { candlestick.map(|cs| cs.close()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_open_time( candlestick: Option, _accessor: AccessorOpenTime, ) -> Option { candlestick.map(|cs| cs.open_time().into()) } #[pg_extern(immutable, parallel_safe)] pub fn open_time(candlestick: Option) -> Option { candlestick.map(|cs| cs.open_time().into()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_high_time( candlestick: Option, _accessor: AccessorHighTime, ) -> Option { candlestick.map(|cs| cs.high_time().into()) } #[pg_extern(immutable, parallel_safe)] pub fn high_time(candlestick: Option) -> Option { candlestick.map(|cs| cs.high_time().into()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_low_time( candlestick: Option, _accessor: AccessorLowTime, ) -> Option { candlestick.map(|cs| cs.low_time().into()) } #[pg_extern(immutable, parallel_safe)] pub fn low_time(candlestick: Option) -> Option { candlestick.map(|cs| cs.low_time().into()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_close_time( candlestick: Option, _accessor: AccessorCloseTime, ) -> Option { candlestick.map(|cs| cs.close_time().into()) } #[pg_extern(immutable, parallel_safe)] pub fn close_time(candlestick: Option) -> Option { candlestick.map(|cs| cs.close_time().into()) } #[pg_extern(immutable, parallel_safe)] pub fn volume(candlestick: Option) -> Option { match candlestick { None => None, Some(cs) => cs.volume(), } } #[pg_extern(immutable, parallel_safe)] pub fn vwap(candlestick: Option) -> Option { match candlestick { None => None, Some(cs) => cs.vwap(), } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use std::ptr; use super::*; use pgrx_macros::pg_test; macro_rules! select_one { ($client:expr, $stmt:expr, $type:ty) => { $client .update($stmt, None, &[]) .unwrap() .first() .get_one::<$type>() .unwrap() }; } macro_rules! select_two { ($client:expr, $stmt:expr, $type1:ty, $type2:ty) => { $client .update($stmt, None, &[]) .unwrap() .first() .get_two::<$type1, $type2>() .unwrap() }; } #[pg_test] fn candlestick_single_point() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"SELECT candlestick(ts, open, high, low, close, volume)::text FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 0.0, 0.0, 0.0, 1.0) ) AS v(ts, open, high, low, close, volume)"#; let output = select_one!(client, stmt, &str); let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:0),\ high:(ts:\"2022-08-01 00:00:00+00\",val:0),\ low:(ts:\"2022-08-01 00:00:00+00\",val:0),\ close:(ts:\"2022-08-01 00:00:00+00\",val:0),\ volume:Transaction(vol:1,vwap:0)\ )"; assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_agg_single_point() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"SELECT candlestick_agg(ts, price, volume)::text FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 1.0) ) AS v(ts, price, volume)"#; let output = select_one!(client, stmt, &str); let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:0),\ high:(ts:\"2022-08-01 00:00:00+00\",val:0),\ low:(ts:\"2022-08-01 00:00:00+00\",val:0),\ close:(ts:\"2022-08-01 00:00:00+00\",val:0),\ volume:Transaction(vol:1,vwap:0)\ )"; assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_accessors() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); for ohlc in ["open", "high", "low", "close"] { let stmt = format!( r#"SELECT {ohlc}(candlestick), {ohlc}_time(candlestick)::text FROM ( SELECT candlestick(ts, open, high, low, close, volume) FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 0.0, 0.0, 0.0, 1.0) ) AS v(ts, open, high, low, close, volume) ) AS v(candlestick)"# ); let (val, ts) = select_two!(client, &stmt, f64, &str); assert_eq!(0.0, val.unwrap()); assert_eq!("2022-08-01 00:00:00+00", ts.unwrap()); } // testing arrow operators for ohlc in ["open", "high", "low", "close"] { let stmt = format!( r#"SELECT candlestick->{ohlc}(), (candlestick->{ohlc}_time())::text FROM ( SELECT candlestick(ts, open, high, low, close, volume) FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 0.0, 0.0, 0.0, 1.0) ) AS v(ts, open, high, low, close, volume) ) AS v(candlestick)"# ); let (val, ts) = select_two!(client, &stmt, f64, &str); assert_eq!(0.0, val.unwrap()); assert_eq!("2022-08-01 00:00:00+00", ts.unwrap()); } let stmt = r#"SELECT volume(candlestick), vwap(candlestick) FROM ( SELECT candlestick(ts, open, high, low, close, volume) FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 0.0, 0.0, 0.0, 1.0) ) AS v(ts, open, high, low, close, volume) ) AS v(candlestick)"#; let (vol, vwap) = select_two!(client, stmt, f64, f64); assert_eq!(1.0, vol.unwrap()); assert_eq!(0.0, vwap.unwrap()); }); } #[pg_test] fn candlestick_agg_accessors() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); for ohlc in ["open", "high", "low", "close"] { let stmt = format!( r#"SELECT {ohlc}(candlestick), {ohlc}_time(candlestick)::text FROM ( SELECT candlestick_agg(ts, price, volume) FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 1.0) ) AS v(ts, price, volume) ) AS v(candlestick)"# ); let (val, ts) = select_two!(client, &stmt, f64, &str); assert_eq!(0.0, val.unwrap()); assert_eq!("2022-08-01 00:00:00+00", ts.unwrap()); } let stmt = r#"SELECT volume(candlestick), vwap(candlestick) FROM ( SELECT candlestick_agg(ts, price, volume) FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 1.0) ) AS v(ts, price, volume) ) AS v(candlestick)"#; let (vol, vwap) = select_two!(client, stmt, f64, f64); assert_eq!(1.0, vol.unwrap()); assert_eq!(0.0, vwap.unwrap()); }); } #[pg_test] fn candlestick_agg_extreme_values() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // timestamptz low and high val according to https://www.postgresql.org/docs/14/datatype-datetime.html for extreme_time in &["4713-01-01 00:00:00+00 BC", "294276-12-31 23:59:59+00"] { let stmt = format!( r#"SELECT candlestick_agg(ts, price, volume)::text FROM (VALUES ('{extreme_time}'::timestamptz, 1.0, 1.0)) AS v(ts, price, volume)"# ); let output = select_one!(client, &stmt, &str); let expected = format!( "(\ version:1,\ open:(ts:\"{extreme_time}\",val:1),\ high:(ts:\"{extreme_time}\",val:1),\ low:(ts:\"{extreme_time}\",val:1),\ close:(ts:\"{extreme_time}\",val:1),\ volume:Transaction(vol:1,vwap:1)\ )" ); assert_eq!(expected, output.unwrap()); } for extreme_price in &[f64::MAX, f64::MIN] { let stmt = format!( r#"SELECT candlestick_agg(ts, price, volume)::text FROM (VALUES ('2022-08-01 00:00:00+00'::timestamptz, {extreme_price}, 1.0)) AS v(ts, price, volume)"# ); let output = select_one!(client, &stmt, &str); let expected = format!( "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:{extreme_price}),\ high:(ts:\"2022-08-01 00:00:00+00\",val:{extreme_price}),\ low:(ts:\"2022-08-01 00:00:00+00\",val:{extreme_price}),\ close:(ts:\"2022-08-01 00:00:00+00\",val:{extreme_price}),\ volume:Transaction(vol:1,vwap:{})\ )", (extreme_price + extreme_price + extreme_price) ); assert_eq!(expected, output.unwrap()); } }); } #[pg_test] fn candlestick_null_inputs() { Spi::connect_mut(|client| { for (t, o, h, l, c, v) in &[ ("NULL", "NULL", "NULL", "NULL", "NULL", "NULL"), ("NULL", "1.0", "1.0", "1.0", "1.0", "1.0"), ("now()", "NULL", "1.0", "1.0", "1.0", "1.0"), ("now()", "1.0", "NULL", "1.0", "1.0", "1.0"), ("now()", "1.0", "1.0", "NULL", "1.0", "1.0"), ("now()", "1.0", "1.0", "1.0", "NULL", "1.0"), ] { let stmt = format!("SELECT candlestick({t}, {o}, {h}, {l}, {c}, {v})::TEXT"); let output = select_one!(client, &stmt, String); assert_eq!(output, None); } }); } #[pg_test] fn candlestick_agg_null_inputs() { Spi::connect_mut(|client| { for (ts, price, vol) in &[ ("NULL", "NULL", "NULL"), ("NULL", "1.0", "1.0"), ("now()", "NULL", "1.0"), ] { let stmt = format!("SELECT candlestick_agg({ts}, {price}, {vol})::text"); let output = select_one!(client, &stmt, String); assert_eq!(output, None); } client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:1),\ high:(ts:\"2022-08-01 00:00:00+00\",val:1),\ low:(ts:\"2022-08-01 00:00:00+00\",val:1),\ close:(ts:\"2022-08-01 00:00:00+00\",val:1),\ volume:Missing()\ )"; let output = select_one!( client, "SELECT candlestick_agg(ts, price, vol)::TEXT FROM (VALUES('2022-08-01 00:00:00+00'::timestamptz, 1.0, NULL::double precision)) AS v(ts, price, vol)", String ).unwrap(); assert_eq!(expected, output); }); } #[pg_test] fn candlestick_as_constructor() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"SELECT candlestick(ts, open, high, low, close, volume)::text FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 0.0, 0.0, 0.0, 1.0), ('2022-08-02 00:00:00+00'::timestamptz, 9.0, 12.0, 3.0, 6.0, 1.0) ) AS v(ts, open, high, low, close, volume)"#; let mut candlesticks = client.update(stmt, None, &[]).unwrap(); let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:0),\ high:(ts:\"2022-08-01 00:00:00+00\",val:0),\ low:(ts:\"2022-08-01 00:00:00+00\",val:0),\ close:(ts:\"2022-08-01 00:00:00+00\",val:0),\ volume:Transaction(vol:1,vwap:0)\ )"; assert_eq!( Some(expected), candlesticks.next().unwrap()[1].value().unwrap() ); let expected = "(\ version:1,\ open:(ts:\"2022-08-02 00:00:00+00\",val:9),\ high:(ts:\"2022-08-02 00:00:00+00\",val:12),\ low:(ts:\"2022-08-02 00:00:00+00\",val:3),\ close:(ts:\"2022-08-02 00:00:00+00\",val:6),\ volume:Transaction(vol:1,vwap:7)\ )"; assert_eq!( Some(expected), candlesticks.next().unwrap()[1].value().unwrap() ); }); } #[pg_test] fn candlestick_agg_constant() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"SELECT date_trunc('day', ts)::text, candlestick_agg(ts, price, volume)::text FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 1.0), ('2022-08-01 06:00:00+00'::timestamptz, 0.0, 1.0), ('2022-08-01 12:00:00+00'::timestamptz, 0.0, 1.0), ('2022-08-01 18:00:00+00'::timestamptz, 0.0, 1.0), ('2022-08-01 23:59:59+00'::timestamptz, 0.0, 1.0) ) AS v(ts, price, volume) GROUP BY 1"#; let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:0),\ high:(ts:\"2022-08-01 00:00:00+00\",val:0),\ low:(ts:\"2022-08-01 00:00:00+00\",val:0),\ close:(ts:\"2022-08-01 23:59:59+00\",val:0),\ volume:Transaction(vol:5,vwap:0)\ )"; let (_, output) = select_two!(client, stmt, &str, &str); assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_agg_strictly_increasing() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"SELECT date_trunc('day', ts)::text, candlestick_agg(ts, price, volume)::text FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 1.0, 1.0), ('2022-08-01 06:00:00+00'::timestamptz, 2.0, 1.0), ('2022-08-01 12:00:00+00'::timestamptz, 3.0, 1.0), ('2022-08-01 18:00:00+00'::timestamptz, 4.0, 1.0), ('2022-08-01 23:59:59+00'::timestamptz, 5.0, 1.0) ) AS v(ts, price, volume) GROUP BY 1"#; let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:1),\ high:(ts:\"2022-08-01 23:59:59+00\",val:5),\ low:(ts:\"2022-08-01 00:00:00+00\",val:1),\ close:(ts:\"2022-08-01 23:59:59+00\",val:5),\ volume:Transaction(vol:5,vwap:15)\ )"; let (_, output) = select_two!(client, stmt, &str, &str); assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_agg_strictly_decreasing() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"SELECT date_trunc('day', ts)::text, candlestick_agg(ts, price, volume)::text FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 5.0, 1.0), ('2022-08-01 06:00:00+00'::timestamptz, 4.0, 1.0), ('2022-08-01 12:00:00+00'::timestamptz, 3.0, 1.0), ('2022-08-01 18:00:00+00'::timestamptz, 2.0, 1.0), ('2022-08-01 23:59:59+00'::timestamptz, 1.0, 1.0) ) AS v(ts, price, volume) GROUP BY 1"#; let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:5),\ high:(ts:\"2022-08-01 00:00:00+00\",val:5),\ low:(ts:\"2022-08-01 23:59:59+00\",val:1),\ close:(ts:\"2022-08-01 23:59:59+00\",val:1),\ volume:Transaction(vol:5,vwap:15)\ )"; let (_, output) = select_two!(client, stmt, &str, &str); assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_agg_oscillating() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"SELECT date_trunc('day', ts)::text, candlestick_agg(ts, price, volume)::text FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 3.0, 1.0), ('2022-08-01 02:00:00+00'::timestamptz, 4.0, 1.0), ('2022-08-01 04:00:00+00'::timestamptz, 11.0, 1.0), ('2022-08-01 06:00:00+00'::timestamptz, 5.0, 1.0), ('2022-08-01 08:00:00+00'::timestamptz, 2.0, 1.0), ('2022-08-01 10:00:00+00'::timestamptz, 1.0, 1.0), ('2022-08-01 12:00:00+00'::timestamptz, 12.0, 1.0), ('2022-08-01 14:00:00+00'::timestamptz, 9.0, 1.0), ('2022-08-01 16:00:00+00'::timestamptz, 10.0, 1.0), ('2022-08-01 18:00:00+00'::timestamptz, 7.0, 1.0), ('2022-08-01 20:00:00+00'::timestamptz, 6.0, 1.0), ('2022-08-01 22:00:00+00'::timestamptz, 8.0, 1.0) ) AS v(ts, price, volume) GROUP BY 1"#; let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:3),\ high:(ts:\"2022-08-01 12:00:00+00\",val:12),\ low:(ts:\"2022-08-01 10:00:00+00\",val:1),\ close:(ts:\"2022-08-01 22:00:00+00\",val:8),\ volume:Transaction(vol:12,vwap:78)\ )"; let (_, output) = select_two!(client, stmt, &str, &str); assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_rollup() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"WITH t AS ( SELECT candlestick(ts, open, high, low, close, volume) AS candlestick FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 4.0, 0.0, 4.0, 5.0), ('2022-08-02 00:00:00+00'::timestamptz, 5.0, 8.0, 5.0, 8.0, 4.0) ) AS v(ts, open, high, low, close, volume) ) SELECT rollup(candlestick)::text FROM t"#; let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:0),\ high:(ts:\"2022-08-02 00:00:00+00\",val:8),\ low:(ts:\"2022-08-01 00:00:00+00\",val:0),\ close:(ts:\"2022-08-02 00:00:00+00\",val:8),\ volume:Transaction(vol:9,vwap:41.33333333333333)\ )"; let output = select_one!(client, stmt, &str); assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_agg_rollup() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = r#"WITH t AS ( SELECT date_trunc('day', ts) AS date, candlestick_agg(ts, price, volume) AS candlestick FROM ( VALUES ('2022-08-01 00:00:00+00'::timestamptz, 0.0, 1.0), ('2022-08-01 06:00:00+00'::timestamptz, 1.0, 1.0), ('2022-08-01 12:00:00+00'::timestamptz, 2.0, 1.0), ('2022-08-01 18:00:00+00'::timestamptz, 3.0, 1.0), ('2022-08-01 23:59:59+00'::timestamptz, 4.0, 1.0), ('2022-08-02 06:00:00+00'::timestamptz, 5.0, 1.0), ('2022-08-02 12:00:00+00'::timestamptz, 6.0, 1.0), ('2022-08-02 18:00:00+00'::timestamptz, 7.0, 1.0), ('2022-08-02 23:59:59+00'::timestamptz, 8.0, 1.0) ) AS v(ts, price, volume) GROUP BY 1 ) SELECT date_trunc('month', date)::text, rollup(candlestick)::text FROM t GROUP BY 1"#; let expected = "(\ version:1,\ open:(ts:\"2022-08-01 00:00:00+00\",val:0),\ high:(ts:\"2022-08-02 23:59:59+00\",val:8),\ low:(ts:\"2022-08-01 00:00:00+00\",val:0),\ close:(ts:\"2022-08-02 23:59:59+00\",val:8),\ volume:Transaction(vol:9,vwap:36)\ )"; let (_, output) = select_two!(client, stmt, &str, &str); assert_eq!(expected, output.unwrap()); }); } #[pg_test] fn candlestick_byte_io() { let state = tick_data_transition_inner( None, Some(100.into()), Some(10.0), Some(1.0), ptr::null_mut(), ); let state = tick_data_transition_inner( state, Some(200.into()), Some(1.0), Some(2.0), ptr::null_mut(), ); let output_buffer = state.unwrap().to_pg_bytes(); let expected = [ 128, 1, 0, 0, 1, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 64, 100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 64, 200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 200, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 240, 63, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 40, 64, ]; assert_eq!(*output_buffer, expected); } } ================================================ FILE: extension/src/counter_agg/accessors.rs ================================================ use pgrx::*; use crate::{ counter_agg::{CounterSummary, CounterSummaryData, MetricSummary}, datum_utils::interval_to_ms, pg_type, ron_inout_funcs, }; use tspoint::TSPoint; pg_type! { struct CounterInterpolatedRateAccessor { timestamp : i64, interval : i64, prev : CounterSummaryData, next : CounterSummaryData, flags : u64, } } ron_inout_funcs!(CounterInterpolatedRateAccessor); #[pg_extern(immutable, parallel_safe, name = "interpolated_rate")] fn counter_interpolated_rate_accessor( start: crate::raw::TimestampTz, duration: crate::raw::Interval, prev: Option, next: Option, ) -> CounterInterpolatedRateAccessor { fn empty_summary() -> Option { let tmp = TSPoint { ts: 0, val: 0.0 }; let tmp = MetricSummary::new(&tmp, None); let tmp = CounterSummary::from_internal_counter_summary(tmp); Some(tmp) } let flags = u64::from(prev.is_some()) + if next.is_some() { 2 } else { 0 }; let prev = prev.or_else(empty_summary).unwrap().0; let next = next.or_else(empty_summary).unwrap().0; let interval = interval_to_ms(&start, &duration); crate::build! { CounterInterpolatedRateAccessor { timestamp : start.into(), interval, prev, next, flags, } } } pg_type! { struct CounterInterpolatedDeltaAccessor { timestamp : i64, interval : i64, prev : CounterSummaryData, next : CounterSummaryData, flags : u64, } } ron_inout_funcs!(CounterInterpolatedDeltaAccessor); #[pg_extern(immutable, parallel_safe, name = "interpolated_delta")] fn counter_interpolated_delta_accessor( start: crate::raw::TimestampTz, duration: crate::raw::Interval, prev: Option, next: Option, ) -> CounterInterpolatedDeltaAccessor { fn empty_summary() -> Option { let tmp = TSPoint { ts: 0, val: 0.0 }; let tmp = MetricSummary::new(&tmp, None); let tmp = CounterSummary::from_internal_counter_summary(tmp); Some(tmp) } let flags = u64::from(prev.is_some()) + if next.is_some() { 2 } else { 0 }; let prev = prev.or_else(empty_summary).unwrap().0; let next = next.or_else(empty_summary).unwrap().0; let interval = interval_to_ms(&start, &duration); crate::build! { CounterInterpolatedDeltaAccessor { timestamp : start.into(), interval, prev, next, flags, } } } ================================================ FILE: extension/src/counter_agg.rs ================================================ use serde::{Deserialize, Serialize}; use pgrx::*; use crate::{ accessors::{ AccessorCorr, AccessorCounterZeroTime, AccessorDelta, AccessorExtrapolatedDelta, AccessorExtrapolatedRate, AccessorFirstTime, AccessorFirstVal, AccessorIdeltaLeft, AccessorIdeltaRight, AccessorIntercept, AccessorIrateLeft, AccessorIrateRight, AccessorLastTime, AccessorLastVal, AccessorNumChanges, AccessorNumElements, AccessorNumResets, AccessorRate, AccessorSlope, AccessorTimeDelta, AccessorWithBounds, }, aggregate_utils::in_aggregate_context, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, range::*, ron_inout_funcs, }; use tspoint::TSPoint; use counter_agg::{range::I64Range, CounterSummaryBuilder, MetricSummary}; use stats_agg::stats2d::StatsSummary2D; use self::Method::*; use crate::raw::tstzrange; use crate::raw::bytea; mod accessors; use accessors::{CounterInterpolatedDeltaAccessor, CounterInterpolatedRateAccessor}; // pg_type! can't handle generics so use a type alias to specify the type for `stats` type PgTypeHackStatsSummary2D = StatsSummary2D; // TODO wrap FlatSummary a la GaugeSummary - requires serialization version bump pg_type! { #[derive(Debug, PartialEq)] struct CounterSummary { stats: PgTypeHackStatsSummary2D, first: TSPoint, second: TSPoint, penultimate:TSPoint, last: TSPoint, reset_sum: f64, num_resets: u64, num_changes: u64, #[flat_serialize::flatten] bounds: I64RangeWrapper, } } ron_inout_funcs!(CounterSummary); impl CounterSummary { pub fn to_internal_counter_summary(&self) -> MetricSummary { MetricSummary { first: self.first, second: self.second, penultimate: self.penultimate, last: self.last, reset_sum: self.reset_sum, num_resets: self.num_resets, num_changes: self.num_changes, stats: self.stats, bounds: self.bounds.to_i64range(), } } pub fn from_internal_counter_summary(st: MetricSummary) -> Self { unsafe { flatten!(CounterSummary { stats: st.stats, first: st.first, second: st.second, penultimate: st.penultimate, last: st.last, reset_sum: st.reset_sum, num_resets: st.num_resets, num_changes: st.num_changes, bounds: I64RangeWrapper::from_i64range(st.bounds) }) } } // fn set_bounds(&mut self, bounds: Option){ // self.bounds = &I64RangeWrapper::from_i64range(bounds); // } fn interpolate( &self, interval_start: i64, interval_len: i64, prev: Option, next: Option, ) -> CounterSummary { let prev = if self.first.ts > interval_start { prev.map(|summary| { let first = if summary.last.val > self.first.val { TSPoint { ts: summary.last.ts, val: 0., } } else { summary.last }; time_weighted_average::TimeWeightMethod::Linear .interpolate(first, Some(self.first), interval_start) .expect("unable to interpolate lower bound") }) } else { None }; let next = next.map(|summary| { let last = if self.last.val > summary.first.val { TSPoint { ts: self.last.ts, val: 0., } } else { self.last }; time_weighted_average::TimeWeightMethod::Linear .interpolate(last, Some(summary.first), interval_start + interval_len) .expect("unable to interpolate upper bound") }); let builder = prev.map(|pt| CounterSummaryBuilder::new(&pt, None)); let mut builder = builder.map_or_else( || { let mut summary = self.clone(); summary.bounds = I64RangeWrapper::from_i64range(None); summary.to_internal_counter_summary().into() }, |mut builder| { builder .combine(&self.to_internal_counter_summary()) .expect("unable to add data to interpolation"); builder }, ); if let Some(next) = next { builder .add_point(&next) .expect("unable to add final interpolated point"); } CounterSummary::from_internal_counter_summary(builder.build()) } } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct CounterSummaryTransState { #[serde(skip)] point_buffer: Vec, #[serde(skip)] bounds: Option, // stores bounds until we combine points, after which, the bounds are stored in each summary // We have a summary buffer here in order to deal with the fact that when the cmobine function gets called it // must first build up a buffer of InternalMetricSummaries, then sort them, then call the combine function in // the correct order. summary_buffer: Vec, } impl CounterSummaryTransState { fn new() -> Self { Self { point_buffer: vec![], bounds: None, summary_buffer: vec![], } } fn push_point(&mut self, value: TSPoint) { self.point_buffer.push(value); } // fn set_bounds(&mut self, bounds: Option){ // self.bounds = bounds; // } fn combine_points(&mut self) { if self.point_buffer.is_empty() { return; } self.point_buffer.sort_unstable_by_key(|p| p.ts); let mut iter = self.point_buffer.iter(); let mut summary = CounterSummaryBuilder::new(iter.next().unwrap(), self.bounds); for p in iter { summary .add_point(p) .unwrap_or_else(|e| pgrx::error!("{}", e)); } self.point_buffer.clear(); // TODO build method should check validity // check bounds only after we've combined all the points, so we aren't doing it all the time. if !summary.bounds_valid() { panic!("counter bounds invalid") } self.summary_buffer.push(summary.build()); } fn push_summary(&mut self, other: &CounterSummaryTransState) { let sum_iter = other.summary_buffer.iter(); for sum in sum_iter { self.summary_buffer.push(sum.clone()); } } fn combine_summaries(&mut self) { self.combine_points(); if self.summary_buffer.len() <= 1 { return; } // TODO move much of this method to crate? self.summary_buffer.sort_unstable_by_key(|s| s.first.ts); let mut sum_iter = self.summary_buffer.iter(); let mut new_summary = CounterSummaryBuilder::from(sum_iter.next().unwrap().clone()); for sum in sum_iter { new_summary .combine(sum) .unwrap_or_else(|e| pgrx::error!("{}", e)); } self.summary_buffer = vec![new_summary.build()]; } } #[pg_extern(immutable, parallel_safe, strict)] pub fn counter_summary_trans_serialize(state: Internal) -> bytea { let mut state = state; let state: &mut CounterSummaryTransState = unsafe { state.get_mut().unwrap() }; state.combine_summaries(); crate::do_serialize!(state) } #[pg_extern(strict, immutable, parallel_safe)] pub fn counter_summary_trans_deserialize(bytes: bytea, _internal: Internal) -> Option { counter_summary_trans_deserialize_inner(bytes).internal() } pub fn counter_summary_trans_deserialize_inner(bytes: bytea) -> Inner { let c: CounterSummaryTransState = crate::do_deserialize!(bytes, CounterSummaryTransState); c.into() } #[pg_extern(immutable, parallel_safe)] pub fn counter_agg_trans( state: Internal, ts: Option, val: Option, bounds: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { counter_agg_trans_inner(unsafe { state.to_inner() }, ts, val, bounds, fcinfo).internal() } pub fn counter_agg_trans_inner( state: Option>, ts: Option, val: Option, bounds: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let p = match (ts, val) { (_, None) => return state, (None, _) => return state, (Some(ts), Some(val)) => TSPoint { ts: ts.into(), val }, }; match state { None => { let mut s = CounterSummaryTransState::new(); if let Some(r) = bounds { s.bounds = get_range(r.0.cast_mut_ptr()); } s.push_point(p); Some(s.into()) } Some(mut s) => { s.push_point(p); Some(s) } } }) } } #[pg_extern(immutable, parallel_safe)] pub fn counter_agg_trans_no_bounds( state: Internal, ts: Option, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { counter_agg_trans_inner(unsafe { state.to_inner() }, ts, val, None, fcinfo).internal() } #[pg_extern(immutable, parallel_safe)] pub fn counter_agg_summary_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { counter_agg_summary_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn counter_agg_summary_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, value) { (state, None) => state, (None, Some(value)) => { let mut state = CounterSummaryTransState::new(); state .summary_buffer .push(value.to_internal_counter_summary()); Some(state.into()) } (Some(mut state), Some(value)) => { state .summary_buffer .push(value.to_internal_counter_summary()); Some(state) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn counter_agg_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { counter_agg_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn counter_agg_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { match (state1, state2) { (None, None) => None, (None, Some(state2)) => { let mut s = state2.clone(); s.combine_points(); Some(s.into()) } (Some(state1), None) => { let mut s = state1.clone(); s.combine_points(); Some(s.into()) } //should I make these return themselves? (Some(state1), Some(state2)) => { let mut s1 = state1.clone(); // is there a way to avoid if it doesn't need it? s1.combine_points(); let mut s2 = state2.clone(); s2.combine_points(); s2.push_summary(&s1); Some(s2.into()) } } }) } } #[pg_extern(immutable, parallel_safe)] fn counter_agg_final(state: Internal, fcinfo: pg_sys::FunctionCallInfo) -> Option { counter_agg_final_inner(unsafe { state.to_inner() }, fcinfo) } fn counter_agg_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state.clone(), }; state.combine_summaries(); debug_assert!(state.summary_buffer.len() <= 1); match state.summary_buffer.pop() { None => None, Some(st) => { // there are some edge cases that this should prevent, but I'm not sure it's necessary, we do check the bounds in the functions that use them. if !st.bounds_valid() { panic!("counter bounds invalid") } Some(CounterSummary::from_internal_counter_summary(st)) } } }) } } extension_sql!( "\n\ CREATE AGGREGATE counter_agg( ts timestamptz, value DOUBLE PRECISION, bounds tstzrange )\n\ (\n\ sfunc = counter_agg_trans,\n\ stype = internal,\n\ finalfunc = counter_agg_final,\n\ combinefunc = counter_agg_combine,\n\ serialfunc = counter_summary_trans_serialize,\n\ deserialfunc = counter_summary_trans_deserialize,\n\ parallel = restricted\n\ );\n", name = "counter_agg", requires = [ counter_agg_trans, counter_agg_final, counter_agg_combine, counter_summary_trans_serialize, counter_summary_trans_deserialize ], ); // allow calling counter agg without bounds provided. extension_sql!( "\n\ CREATE AGGREGATE counter_agg( ts timestamptz, value DOUBLE PRECISION )\n\ (\n\ sfunc = counter_agg_trans_no_bounds,\n\ stype = internal,\n\ finalfunc = counter_agg_final,\n\ combinefunc = counter_agg_combine,\n\ serialfunc = counter_summary_trans_serialize,\n\ deserialfunc = counter_summary_trans_deserialize,\n\ parallel = restricted\n\ );\n\ ", name = "counter_agg2", requires = [ counter_agg_trans_no_bounds, counter_agg_final, counter_agg_combine, counter_summary_trans_serialize, counter_summary_trans_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(cs CounterSummary)\n\ (\n\ sfunc = counter_agg_summary_trans,\n\ stype = internal,\n\ finalfunc = counter_agg_final,\n\ combinefunc = counter_agg_combine,\n\ serialfunc = counter_summary_trans_serialize,\n\ deserialfunc = counter_summary_trans_deserialize,\n\ parallel = restricted\n\ );\n\ ", name = "counter_rollup", requires = [ counter_agg_summary_trans, counter_agg_final, counter_agg_combine, counter_summary_trans_serialize, counter_summary_trans_deserialize ], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_delta(sketch: CounterSummary, _accessor: AccessorDelta) -> f64 { counter_agg_delta(sketch) } #[pg_extern(name = "delta", strict, immutable, parallel_safe)] fn counter_agg_delta(summary: CounterSummary) -> f64 { summary.to_internal_counter_summary().delta() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_rate(sketch: CounterSummary, _accessor: AccessorRate) -> Option { counter_agg_rate(sketch) } #[pg_extern(name = "rate", strict, immutable, parallel_safe)] fn counter_agg_rate(summary: CounterSummary) -> Option { summary.to_internal_counter_summary().rate() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_time_delta(sketch: CounterSummary, _accessor: AccessorTimeDelta) -> f64 { counter_agg_time_delta(sketch) } #[pg_extern(name = "time_delta", strict, immutable, parallel_safe)] fn counter_agg_time_delta(summary: CounterSummary) -> f64 { summary.to_internal_counter_summary().time_delta() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_irate_left( sketch: CounterSummary, _accessor: AccessorIrateLeft, ) -> Option { counter_agg_irate_left(sketch) } #[pg_extern(name = "irate_left", strict, immutable, parallel_safe)] fn counter_agg_irate_left(summary: CounterSummary) -> Option { summary.to_internal_counter_summary().irate_left() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_irate_right( sketch: CounterSummary, _accessor: AccessorIrateRight, ) -> Option { counter_agg_irate_right(sketch) } #[pg_extern(name = "irate_right", strict, immutable, parallel_safe)] fn counter_agg_irate_right(summary: CounterSummary) -> Option { summary.to_internal_counter_summary().irate_right() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_idelta_left(sketch: CounterSummary, _accessor: AccessorIdeltaLeft) -> f64 { counter_agg_idelta_left(sketch) } #[pg_extern(name = "idelta_left", strict, immutable, parallel_safe)] fn counter_agg_idelta_left(summary: CounterSummary) -> f64 { summary.to_internal_counter_summary().idelta_left() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_idelta_right( sketch: CounterSummary, _accessor: AccessorIdeltaRight, ) -> f64 { counter_agg_idelta_right(sketch) } #[pg_extern(name = "idelta_right", strict, immutable, parallel_safe)] fn counter_agg_idelta_right(summary: CounterSummary) -> f64 { summary.to_internal_counter_summary().idelta_right() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_with_bounds( sketch: CounterSummary, accessor: AccessorWithBounds, ) -> CounterSummary { let mut builder = CounterSummaryBuilder::from(sketch.to_internal_counter_summary()); builder.set_bounds(accessor.bounds()); CounterSummary::from_internal_counter_summary(builder.build()) } #[pg_extern(name = "with_bounds", strict, immutable, parallel_safe)] fn counter_agg_with_bounds(summary: CounterSummary, bounds: tstzrange) -> CounterSummary { // TODO dedup with previous by using apply_bounds unsafe { let ptr = bounds.0.cast_mut_ptr(); let mut builder = CounterSummaryBuilder::from(summary.to_internal_counter_summary()); builder.set_bounds(get_range(ptr)); CounterSummary::from_internal_counter_summary(builder.build()) } } // TODO MetricSummary::with_bounds ? // fn with_bounds(mut self, bounds: Option) -> Self { // self.bounds = bounds; // self // } // fn apply_bounds(summary: MetricSummary, bounds: Option) -> MetricSummary { // let mut builder = CounterSummaryBuilder::from(summary.to_internal_counter_summary()); // builder.set_bounds(bounds); // CounterSummary::from_internal_counter_summary(builder.build()) // } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_extrapolated_delta( sketch: CounterSummary, accessor: AccessorExtrapolatedDelta, ) -> Option { counter_agg_extrapolated_delta(sketch, accessor.method.as_str()) } #[pg_extern(name = "extrapolated_delta", strict, immutable, parallel_safe)] fn counter_agg_extrapolated_delta(summary: CounterSummary, method: &str) -> Option { match method_kind(method) { Prometheus => summary .to_internal_counter_summary() .prometheus_delta() .unwrap(), } } #[pg_extern(name = "interpolated_delta", immutable, parallel_safe)] fn counter_agg_interpolated_delta( summary: CounterSummary, start: crate::raw::TimestampTz, duration: crate::raw::Interval, prev: Option, next: Option, ) -> f64 { let interval = crate::datum_utils::interval_to_ms(&start, &duration); summary .interpolate(start.into(), interval, prev, next) .to_internal_counter_summary() .delta() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_interpolated_delta( sketch: CounterSummary, accessor: CounterInterpolatedDeltaAccessor, ) -> f64 { let prev = if accessor.flags & 1 == 1 { Some(accessor.prev.clone().into()) } else { None }; let next = if accessor.flags & 2 == 2 { Some(accessor.next.clone().into()) } else { None }; counter_agg_interpolated_delta( sketch, accessor.timestamp.into(), accessor.interval.into(), prev, next, ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_extrapolated_rate( sketch: CounterSummary, accessor: AccessorExtrapolatedRate, ) -> Option { counter_agg_extrapolated_rate(sketch, accessor.method.as_str()) } #[pg_extern(name = "extrapolated_rate", strict, immutable, parallel_safe)] fn counter_agg_extrapolated_rate(summary: CounterSummary, method: &str) -> Option { match method_kind(method) { Prometheus => summary .to_internal_counter_summary() .prometheus_rate() .unwrap(), } } #[pg_extern(name = "interpolated_rate", immutable, parallel_safe)] fn counter_agg_interpolated_rate( summary: CounterSummary, start: crate::raw::TimestampTz, duration: crate::raw::Interval, prev: Option, next: Option, ) -> Option { let interval = crate::datum_utils::interval_to_ms(&start, &duration); summary .interpolate(start.into(), interval, prev, next) .to_internal_counter_summary() .rate() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_interpolated_rate( sketch: CounterSummary, accessor: CounterInterpolatedRateAccessor, ) -> Option { let prev = if accessor.flags & 1 == 1 { Some(accessor.prev.clone().into()) } else { None }; let next = if accessor.flags & 2 == 2 { Some(accessor.next.clone().into()) } else { None }; counter_agg_interpolated_rate( sketch, accessor.timestamp.into(), accessor.interval.into(), prev, next, ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_num_elements( sketch: CounterSummary, _accessor: AccessorNumElements, ) -> i64 { counter_agg_num_elements(sketch) } #[pg_extern(name = "num_elements", strict, immutable, parallel_safe)] fn counter_agg_num_elements(summary: CounterSummary) -> i64 { summary.to_internal_counter_summary().stats.n as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_num_changes(sketch: CounterSummary, _accessor: AccessorNumChanges) -> i64 { counter_agg_num_changes(sketch) } #[pg_extern(name = "num_changes", strict, immutable, parallel_safe)] fn counter_agg_num_changes(summary: CounterSummary) -> i64 { summary.to_internal_counter_summary().num_changes as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_num_resets(sketch: CounterSummary, _accessor: AccessorNumResets) -> i64 { counter_agg_num_resets(sketch) } #[pg_extern(name = "num_resets", strict, immutable, parallel_safe)] fn counter_agg_num_resets(summary: CounterSummary) -> i64 { summary.to_internal_counter_summary().num_resets as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_slope(sketch: CounterSummary, _accessor: AccessorSlope) -> Option { counter_agg_slope(sketch) } #[pg_extern(name = "slope", strict, immutable, parallel_safe)] fn counter_agg_slope(summary: CounterSummary) -> Option { summary.to_internal_counter_summary().stats.slope() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_intercept( sketch: CounterSummary, _accessor: AccessorIntercept, ) -> Option { counter_agg_intercept(sketch) } #[pg_extern(name = "intercept", strict, immutable, parallel_safe)] fn counter_agg_intercept(summary: CounterSummary) -> Option { summary.to_internal_counter_summary().stats.intercept() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_corr(sketch: CounterSummary, _accessor: AccessorCorr) -> Option { counter_agg_corr(sketch) } #[pg_extern(name = "corr", strict, immutable, parallel_safe)] fn counter_agg_corr(summary: CounterSummary) -> Option { summary.to_internal_counter_summary().stats.corr() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_zero_time( sketch: CounterSummary, _accessor: AccessorCounterZeroTime, ) -> Option { counter_agg_counter_zero_time(sketch) } #[pg_extern(name = "counter_zero_time", strict, immutable, parallel_safe)] fn counter_agg_counter_zero_time(summary: CounterSummary) -> Option { Some(((summary.to_internal_counter_summary().stats.x_intercept()? * 1_000_000.0) as i64).into()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_first_val(sketch: CounterSummary, _accessor: AccessorFirstVal) -> f64 { counter_agg_first_val(sketch) } #[pg_extern(name = "first_val", strict, immutable, parallel_safe)] fn counter_agg_first_val(summary: CounterSummary) -> f64 { summary.to_internal_counter_summary().first.val } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_last_val(sketch: CounterSummary, _accessor: AccessorLastVal) -> f64 { counter_agg_last_val(sketch) } #[pg_extern(name = "last_val", strict, immutable, parallel_safe)] fn counter_agg_last_val(summary: CounterSummary) -> f64 { summary.to_internal_counter_summary().last.val } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_first_time( sketch: CounterSummary, _accessor: AccessorFirstTime, ) -> crate::raw::TimestampTz { counter_agg_first_time(sketch) } #[pg_extern(name = "first_time", strict, immutable, parallel_safe)] fn counter_agg_first_time(summary: CounterSummary) -> crate::raw::TimestampTz { summary.to_internal_counter_summary().first.ts.into() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_counter_agg_last_time( sketch: CounterSummary, _accessor: AccessorLastTime, ) -> crate::raw::TimestampTz { counter_agg_last_time(sketch) } #[pg_extern(name = "last_time", strict, immutable, parallel_safe)] fn counter_agg_last_time(summary: CounterSummary) -> crate::raw::TimestampTz { summary.to_internal_counter_summary().last.ts.into() } #[derive( Clone, Copy, Debug, serde::Serialize, serde::Deserialize, flat_serialize_macro::FlatSerializable, )] #[repr(u8)] pub enum Method { Prometheus = 1, } impl Method { pub fn as_str(&self) -> &'static str { match self { Method::Prometheus => "prometheus", } } } #[track_caller] pub fn method_kind(method: &str) -> Method { match as_method(method) { Some(method) => method, None => pgrx::error!("unknown analysis method. Valid methods are 'prometheus'"), } } pub fn as_method(method: &str) -> Option { match method.trim().to_lowercase().as_str() { "prometheus" => Some(Method::Prometheus), _ => None, } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::testing::*; use super::*; use approx::assert_relative_eq; macro_rules! select_one { ($client:expr, $stmt:expr, $type:ty) => { $client .update($stmt, None, &[]) .unwrap() .first() .get_one::<$type>() .unwrap() .unwrap() }; } macro_rules! select_and_check_one { ($client:expr, $stmt:expr, $type:ty) => {{ let (a, b) = $client .update($stmt, None, &[]) .unwrap() .first() .get_two::<$type, $type>() .unwrap(); assert_eq!(a, b); a.unwrap() }}; } //do proper numerical comparisons on the values where that matters, use exact where it should be exact. // copied from counter_agg crate #[track_caller] fn assert_close_enough(p1: &MetricSummary, p2: &MetricSummary) { assert_eq!(p1.first, p2.first, "first"); assert_eq!(p1.second, p2.second, "second"); assert_eq!(p1.penultimate, p2.penultimate, "penultimate"); assert_eq!(p1.last, p2.last, "last"); assert_eq!(p1.num_changes, p2.num_changes, "num_changes"); assert_eq!(p1.num_resets, p2.num_resets, "num_resets"); assert_eq!(p1.stats.n, p2.stats.n, "n"); assert_relative_eq!(p1.stats.sx, p2.stats.sx); assert_relative_eq!(p1.stats.sx2, p2.stats.sx2); assert_relative_eq!(p1.stats.sy, p2.stats.sy); assert_relative_eq!(p1.stats.sy2, p2.stats.sy2); assert_relative_eq!(p1.stats.sxy, p2.stats.sxy); } #[pg_test] fn test_counter_aggregate() { Spi::connect_mut(|client| { // set search_path after defining our table so we don't pollute the wrong schema client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = "SELECT format('toolkit_experimental, %s',current_setting('search_path'))"; let search_path = select_one!(client, stmt, String); client .update( &format!("SET LOCAL search_path TO {search_path}"), None, &[], ) .unwrap(); make_test_table(client, "test"); // NULL bounds are equivalent to none provided let stmt = "SELECT counter_agg(ts, val) FROM test"; let a = select_one!(client, stmt, CounterSummary); let stmt = "SELECT counter_agg(ts, val, NULL::tstzrange) FROM test"; let b = select_one!(client, stmt, CounterSummary); assert_close_enough( &a.to_internal_counter_summary(), &b.to_internal_counter_summary(), ); let stmt = "SELECT \ delta(counter_agg(ts, val)), \ counter_agg(ts, val)->delta() \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), 10.0); let stmt = "SELECT \ time_delta(counter_agg(ts, val)), \ counter_agg(ts, val)->time_delta() \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), 60.0); // have to add 1 ms to right bounds to get full range and simple values because prometheus subtracts a ms let stmt = "SELECT \ extrapolated_delta(counter_agg(ts, val, '[2020-01-01 00:00:00+00, 2020-01-01 00:02:00.001+00)'), 'prometheus'), \ counter_agg(ts, val, '[2020-01-01 00:00:00+00, 2020-01-01 00:02:00.001+00)') -> extrapolated_delta('prometheus') \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), 20.0); // doesn't matter if we set the bounds before or after let stmt = "SELECT \ extrapolated_delta(with_bounds(counter_agg(ts, val), '[2020-01-01 00:00:00+00, 2020-01-01 00:02:00.001+00)'), 'prometheus'), \ counter_agg(ts, val)->with_bounds('[2020-01-01 00:00:00+00, 2020-01-01 00:02:00.001+00)')-> extrapolated_delta('prometheus') \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), 20.0); let stmt = "SELECT \ extrapolated_rate(counter_agg(ts, val, '[2020-01-01 00:00:00+00, 2020-01-01 00:02:00.001+00)'), 'prometheus'), \ counter_agg(ts, val, '[2020-01-01 00:00:00+00, 2020-01-01 00:02:00.001+00)')->extrapolated_rate('prometheus') \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), 20.0 / 120.0); let stmt = "INSERT INTO test VALUES('2020-01-01 00:02:00+00', 10.0), ('2020-01-01 00:03:00+00', 20.0), ('2020-01-01 00:04:00+00', 10.0)"; client.update(stmt, None, &[]).unwrap(); let stmt = "SELECT \ slope(counter_agg(ts, val)), \ counter_agg(ts, val)->slope() \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), 10.0 / 60.0); let stmt = "SELECT \ intercept(counter_agg(ts, val)), \ counter_agg(ts, val)->intercept() \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), -105191990.0); let stmt = "SELECT \ corr(counter_agg(ts, val)), \ counter_agg(ts, val)->corr() \ FROM test"; assert_relative_eq!(select_and_check_one!(client, stmt, f64), 1.0); let stmt = "SELECT \ counter_zero_time(counter_agg(ts, val))::TEXT, \ (counter_agg(ts, val)->counter_zero_time())::TEXT \ FROM test"; let zp = select_and_check_one!(client, stmt, String); assert_eq!(&zp, "2019-12-31 23:59:00+00"); let stmt = "INSERT INTO test VALUES('2020-01-01 00:08:00+00', 30.0), ('2020-01-01 00:10:00+00', 30.0), ('2020-01-01 00:10:30+00', 10.0), ('2020-01-01 00:20:00+00', 40.0)"; client.update(stmt, None, &[]).unwrap(); let stmt = "SELECT \ num_elements(counter_agg(ts, val)), \ counter_agg(ts, val)->num_elements() \ FROM test"; assert_eq!(select_and_check_one!(client, stmt, i64), 9); let stmt = "SELECT \ num_resets(counter_agg(ts, val)), \ counter_agg(ts, val)->num_resets() \ FROM test"; assert_eq!(select_and_check_one!(client, stmt, i64), 3); let stmt = "SELECT \ num_changes(counter_agg(ts, val)), \ counter_agg(ts, val)->num_changes() \ FROM test"; assert_eq!(select_and_check_one!(client, stmt, i64), 7); //combine function works as expected let stmt = "SELECT counter_agg(ts, val) FROM test"; let a = select_one!(client, stmt, CounterSummary); let stmt = "WITH t as (SELECT date_trunc('minute', ts), counter_agg(ts, val) as agg FROM test group by 1 ) SELECT rollup(agg) FROM t"; let b = select_one!(client, stmt, CounterSummary); assert_close_enough( &a.to_internal_counter_summary(), &b.to_internal_counter_summary(), ); }); } #[pg_test] fn test_counter_io() { Spi::connect_mut(|client| { client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); let stmt = "INSERT INTO test VALUES\ ('2020-01-01 00:00:00+00', 10.0),\ ('2020-01-01 00:01:00+00', 20.0),\ ('2020-01-01 00:02:00+00', 30.0),\ ('2020-01-01 00:03:00+00', 20.0),\ ('2020-01-01 00:04:00+00', 10.0),\ ('2020-01-01 00:05:00+00', 20.0),\ ('2020-01-01 00:06:00+00', 10.0),\ ('2020-01-01 00:07:00+00', 30.0),\ ('2020-01-01 00:08:00+00', 10.0)"; client.update(stmt, None, &[]).unwrap(); let expected = "(\ version:1,\ stats:(\ n:9,\ sx:5680370160,\ sx2:216000,\ sx3:0,\ sx4:9175680000,\ sy:530,\ sy2:9688.888888888889,\ sy3:13308.641975308623,\ sy4:18597366.255144034,\ sxy:45600\ ),\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ second:(ts:\"2020-01-01 00:01:00+00\",val:20),\ penultimate:(ts:\"2020-01-01 00:07:00+00\",val:30),\ last:(ts:\"2020-01-01 00:08:00+00\",val:10),\ reset_sum:100,\ num_resets:4,\ num_changes:8,\ bounds:(\ is_present:0,\ has_left:0,\ has_right:0,\ padding:(0,0,0,0,0),\ left:None,\ right:None\ )\ )"; let stmt = "SELECT counter_agg(ts, val)::TEXT FROM test"; let test = select_one!(client, stmt, String); assert_eq!(test, expected); let stmt = format!("SELECT '{expected}'::CounterSummary::TEXT"); let round_trip = select_one!(client, &stmt, String); assert_eq!(expected, round_trip); let stmt = "SELECT delta(counter_agg(ts, val)) FROM test"; let delta = select_one!(client, stmt, f64); assert!((delta - 100.).abs() < f64::EPSILON); let stmt = format!("SELECT delta('{expected}')"); let delta_test = select_one!(client, &stmt, f64); assert!((delta - delta_test).abs() < f64::EPSILON); let stmt = "SELECT num_resets(counter_agg(ts, val)) FROM test"; let resets = select_one!(client, stmt, i64); assert_eq!(resets, 4); let stmt = format!("SELECT num_resets('{expected}')"); let resets_test = select_one!(client, &stmt, i64); assert_eq!(resets, resets_test); }); } #[pg_test] fn test_counter_byte_io() { unsafe { use std::ptr; const BASE: i64 = 631152000000000; const MIN: i64 = 60000000; let state = counter_agg_trans_inner(None, Some(BASE.into()), Some(10.0), None, ptr::null_mut()); let state = counter_agg_trans_inner( state, Some((BASE + MIN).into()), Some(20.0), None, ptr::null_mut(), ); let state = counter_agg_trans_inner( state, Some((BASE + 2 * MIN).into()), Some(30.0), None, ptr::null_mut(), ); let state = counter_agg_trans_inner( state, Some((BASE + 3 * MIN).into()), Some(10.0), None, ptr::null_mut(), ); let state = counter_agg_trans_inner( state, Some((BASE + 4 * MIN).into()), Some(20.0), None, ptr::null_mut(), ); let state = counter_agg_trans_inner( state, Some((BASE + 5 * MIN).into()), Some(30.0), None, ptr::null_mut(), ); let mut control = state.unwrap(); let buffer = counter_summary_trans_serialize(Inner::from(control.clone()).internal().unwrap()); let buffer = pgrx::varlena::varlena_to_byte_slice(buffer.0.cast_mut_ptr()); let expected = [ 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 96, 194, 134, 7, 62, 2, 0, 0, 0, 0, 0, 0, 0, 36, 64, 0, 231, 85, 138, 7, 62, 2, 0, 0, 0, 0, 0, 0, 0, 52, 64, 0, 124, 16, 149, 7, 62, 2, 0, 0, 0, 0, 0, 0, 0, 52, 64, 0, 3, 164, 152, 7, 62, 2, 0, 0, 0, 0, 0, 0, 0, 62, 64, 0, 0, 0, 0, 0, 0, 62, 64, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 144, 246, 54, 236, 65, 0, 0, 0, 0, 0, 195, 238, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 32, 17, 209, 65, 0, 0, 0, 0, 0, 64, 106, 64, 0, 0, 0, 0, 0, 88, 155, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76, 248, 42, 65, 0, 0, 0, 0, 0, 130, 196, 64, 0, ]; assert_eq!(buffer, expected); let expected = pgrx::varlena::rust_byte_slice_to_bytea(&expected); let new_state = counter_summary_trans_deserialize_inner(bytea(pg_sys::Datum::from( expected.as_ptr(), ))); control.combine_summaries(); // Serialized form is always combined assert_eq!(&*new_state, &*control); } } #[pg_test] fn delta_after_counter_decrease() { Spi::connect_mut(|client| { decrease(client); let stmt = "SELECT delta(counter_agg(ts, val)) FROM test"; // 10 after 30 means there was a reset so we add 30 + 10 = 40. // Delta from 30 to 40 => 10 assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn delta_after_counter_increase() { Spi::connect_mut(|client| { increase(client); let stmt = "SELECT delta(counter_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn delta_after_counter_decrease_then_increase_to_same_value() { Spi::connect_mut(|client| { decrease_then_increase_to_same_value(client); let stmt = "SELECT delta(counter_agg(ts, val)) FROM test"; // 10 after 30 means there was a reset so we add 30 + 10 + 30 = 70. // Delta from 30 to 70 => 30 assert_eq!(30.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn delta_after_counter_increase_then_decrease_to_same_value() { Spi::connect_mut(|client| { increase_then_decrease_to_same_value(client); let stmt = "SELECT delta(counter_agg(ts, val)) FROM test"; // In this case, counter goes 10, 30, 40 (reset + 10). // Delta from 10 to 40 => 30 assert_eq!(30.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_counter_decrease() { Spi::connect_mut(|client| { decrease(client); let stmt = "SELECT idelta_left(counter_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_counter_increase() { Spi::connect_mut(|client| { increase(client); let stmt = "SELECT idelta_left(counter_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_counter_increase_then_decrease_to_same_value() { Spi::connect_mut(|client| { increase_then_decrease_to_same_value(client); let stmt = "SELECT idelta_left(counter_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_counter_decrease_then_increase_to_same_value() { Spi::connect_mut(|client| { decrease_then_increase_to_same_value(client); let stmt = "SELECT idelta_left(counter_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_counter_decrease() { Spi::connect_mut(|client| { decrease(client); let stmt = "SELECT idelta_right(counter_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_counter_increase() { Spi::connect_mut(|client| { increase(client); let stmt = "SELECT idelta_right(counter_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_counter_increase_then_decrease_to_same_value() { Spi::connect_mut(|client| { increase_then_decrease_to_same_value(client); let stmt = "SELECT idelta_right(counter_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_counter_decrease_then_increase_to_same_value() { Spi::connect_mut(|client| { decrease_then_increase_to_same_value(client); let stmt = "SELECT idelta_right(counter_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn counter_agg_interpolation() { Spi::connect_mut(|client| { client.update( "CREATE TABLE test(time timestamptz, value double precision, bucket timestamptz)", None, &[] ).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-1-1 10:00'::timestamptz, 10.0, '2020-1-1'::timestamptz), ('2020-1-1 12:00'::timestamptz, 40.0, '2020-1-1'::timestamptz), ('2020-1-1 16:00'::timestamptz, 20.0, '2020-1-1'::timestamptz), ('2020-1-2 4:00'::timestamptz, 15.0, '2020-1-2'::timestamptz), ('2020-1-2 12:00'::timestamptz, 50.0, '2020-1-2'::timestamptz), ('2020-1-2 20:00'::timestamptz, 25.0, '2020-1-2'::timestamptz), ('2020-1-3 4:00'::timestamptz, 30.0, '2020-1-3'::timestamptz), ('2020-1-3 12:00'::timestamptz, 0.0, '2020-1-3'::timestamptz), ('2020-1-3 16:00'::timestamptz, 35.0, '2020-1-3'::timestamptz)"#, None, &[], ) .unwrap(); let mut deltas = client .update( r#"SELECT interpolated_delta( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, counter_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, start at 10, interpolated end of day is 10 (after reset), reset at 40 and 20 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(10. + 40. + 20. - 10.) ); // Day 2, interpolated start is 10, interpolated end is 27.5, reset at 50 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(27.5 + 50. - 10.) ); // Day 3, interpolated start is 27.5, end is 35, reset at 30 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(35. + 30. - 27.5) ); assert!(deltas.next().is_none()); // test that the arrow version also returns the same result let mut deltas = client .update( r#"SELECT agg -> interpolated_delta( bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, counter_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, start at 10, interpolated end of day is 10 (after reset), reset at 40 and 20 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(10. + 40. + 20. - 10.) ); // Day 2, interpolated start is 10, interpolated end is 27.5, reset at 50 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(27.5 + 50. - 10.) ); // Day 3, interpolated start is 27.5, end is 35, reset at 30 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(35. + 30. - 27.5) ); assert!(deltas.next().is_none()); let mut rates = client .update( r#"SELECT interpolated_rate( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, counter_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, 14 hours (rate is per second) assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((10. + 40. + 20. - 10.) / (14. * 60. * 60.)) ); // Day 2, 24 hours assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((27.5 + 50. - 10.) / (24. * 60. * 60.)) ); // Day 3, 16 hours assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((35. + 30. - 27.5) / (16. * 60. * 60.)) ); assert!(rates.next().is_none()); // test that the arrow operator version also returns the same result let mut rates = client .update( r#"SELECT agg -> interpolated_rate( bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, counter_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, 14 hours (rate is per second) assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((10. + 40. + 20. - 10.) / (14. * 60. * 60.)) ); // Day 2, 24 hours assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((27.5 + 50. - 10.) / (24. * 60. * 60.)) ); // Day 3, 16 hours assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((35. + 30. - 27.5) / (16. * 60. * 60.)) ); assert!(rates.next().is_none()); }); } #[pg_test] fn interpolated_delta_with_aligned_point() { Spi::connect_mut(|client| { client.update( "CREATE TABLE test(time timestamptz, value double precision, bucket timestamptz)", None, &[] ).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-1-1 10:00'::timestamptz, 10.0, '2020-1-1'::timestamptz), ('2020-1-1 12:00'::timestamptz, 40.0, '2020-1-1'::timestamptz), ('2020-1-1 16:00'::timestamptz, 20.0, '2020-1-1'::timestamptz), ('2020-1-2 0:00'::timestamptz, 15.0, '2020-1-2'::timestamptz), ('2020-1-2 12:00'::timestamptz, 50.0, '2020-1-2'::timestamptz), ('2020-1-2 20:00'::timestamptz, 25.0, '2020-1-2'::timestamptz)"#, None, &[], ) .unwrap(); let mut deltas = client .update( r#"SELECT interpolated_delta( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, counter_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, start at 10, interpolated end of day is 15 (after reset), reset at 40 and 20 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(15. + 40. + 20. - 10.) ); // Day 2, start is 15, end is 25, reset at 50 assert_eq!( deltas.next().unwrap()[1].value().unwrap(), Some(25. + 50. - 15.) ); assert!(deltas.next().is_none()); }); } #[pg_test] fn irate_left_arrow_match() { Spi::connect_mut(|client| { make_test_table(client, "test"); assert_relative_eq!( select_and_check_one!( client, "SELECT \ irate_left(counter_agg(ts, val)), \ counter_agg(ts, val) -> irate_left() \ FROM test", f64 ), 0.16666666666666666, ); }); } #[pg_test] fn irate_right_arrow_match() { Spi::connect_mut(|client| { make_test_table(client, "test"); assert_relative_eq!( select_and_check_one!( client, "SELECT \ irate_right(counter_agg(ts, val)), \ counter_agg(ts, val) -> irate_right() \ FROM test", f64 ), 0.16666666666666666, ); }); } #[pg_test] fn idelta_left_arrow_match() { Spi::connect_mut(|client| { make_test_table(client, "test"); assert_relative_eq!( select_and_check_one!( client, "SELECT \ idelta_left(counter_agg(ts, val)), \ counter_agg(ts, val) -> idelta_left() \ FROM test", f64 ), 10.0, ); }); } #[pg_test] fn idelta_right_arrow_match() { Spi::connect_mut(|client| { make_test_table(client, "test"); assert_relative_eq!( select_and_check_one!( client, "SELECT \ idelta_right(counter_agg(ts, val)), \ counter_agg(ts, val) -> idelta_right() \ FROM test", f64 ), 10.0, ); }); } #[pg_test] fn num_resets_arrow_match() { Spi::connect_mut(|client| { make_test_table(client, "test"); assert_relative_eq!( select_and_check_one!( client, "SELECT \ num_resets(counter_agg(ts, val))::float, \ (counter_agg(ts, val) -> num_resets())::float \ FROM test", f64 ), 0.0, ); }); } #[pg_test] fn first_and_last_val() { Spi::connect_mut(|client| { make_test_table(client, "test"); assert_relative_eq!( select_one!( client, "SELECT \ first_val(counter_agg(ts, val)) \ FROM test", f64 ), 10.0, ); assert_relative_eq!( select_one!( client, "SELECT \ last_val(counter_agg(ts, val)) \ FROM test", f64 ), 20.0, ); }); } #[pg_test] fn first_and_last_val_arrow_match() { Spi::connect_mut(|client| { make_test_table(client, "test"); assert_relative_eq!( select_and_check_one!( client, "SELECT \ first_val(counter_agg(ts, val)), \ counter_agg(ts, val) -> first_val() \ FROM test", f64 ), 10.0, ); assert_relative_eq!( select_and_check_one!( client, "SELECT \ last_val(counter_agg(ts, val)), \ counter_agg(ts, val) -> last_val() \ FROM test", f64 ), 20.0, ); }); } #[pg_test] fn first_and_last_time() { Spi::connect_mut(|client| { make_test_table(client, "test"); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); assert_eq!( select_one!( client, "SELECT \ first_time(counter_agg(ts, val))::text \ FROM test", &str ), "2020-01-01 00:00:00+00", ); assert_eq!( select_one!( client, "SELECT \ last_time(counter_agg(ts, val))::text \ FROM test", &str ), "2020-01-01 00:01:00+00", ); }); } #[pg_test] fn first_and_last_time_arrow_match() { Spi::connect_mut(|client| { make_test_table(client, "test"); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); assert_eq!( select_and_check_one!( client, "SELECT \ first_time(counter_agg(ts, val))::text, \ (counter_agg(ts, val) -> first_time())::text \ FROM test", &str ), "2020-01-01 00:00:00+00", ); assert_eq!( select_and_check_one!( client, "SELECT \ last_time(counter_agg(ts, val))::text, \ (counter_agg(ts, val) -> last_time())::text \ FROM test", &str ), "2020-01-01 00:01:00+00", ); }); } // #[pg_test] // fn test_combine_aggregate(){ // Spi::connect_mut(|client| { // }); // } } #[cfg(any(test, feature = "pg_test"))] pub(crate) mod testing { pub fn decrease(client: &mut pgrx::spi::SpiClient) { client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 30.0), ('2020-01-01 00:07:00+00', 10.0)"#, None, &[], ) .unwrap(); } pub fn increase(client: &mut pgrx::spi::SpiClient) { client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 10.0), ('2020-01-01 00:07:00+00', 30.0)"#, None, &[], ) .unwrap(); } pub fn decrease_then_increase_to_same_value(client: &mut pgrx::spi::SpiClient) { client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 30.0), ('2020-01-01 00:07:00+00', 10.0), ('2020-01-01 00:08:00+00', 30.0)"#, None, &[], ) .unwrap(); } pub fn increase_then_decrease_to_same_value(client: &mut pgrx::spi::SpiClient) { client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 10.0), ('2020-01-01 00:07:00+00', 30.0), ('2020-01-01 00:08:00+00', 10.0)"#, None, &[], ) .unwrap(); } pub fn make_test_table(client: &mut pgrx::spi::SpiClient, name: &str) { client .update( &format!("CREATE TABLE {name}(ts timestamptz, val DOUBLE PRECISION)"), None, &[], ) .unwrap(); client.update( &format!("INSERT INTO {name} VALUES('2020-01-01 00:00:00+00', 10.0), ('2020-01-01 00:01:00+00', 20.0)"), None, &[] ).unwrap(); } } ================================================ FILE: extension/src/countminsketch.rs ================================================ use pgrx::*; use aggregate_builder::aggregate; use countminsketch::{CountMinHashFn, CountMinSketch as CountMinSketchInternal}; use crate::{ flatten, palloc::{Inner, Internal}, pg_type, raw::bytea, ron_inout_funcs, }; #[pg_schema] pub mod toolkit_experimental { use super::*; pg_type! { #[derive(Debug)] struct CountMinSketch<'input> { width: u32, depth: u32, counters: [i64; self.width * self.depth], } } impl CountMinSketch<'_> { fn new(width: u32, depth: u32, counters: Vec) -> Self { let counters_arr = counters.into(); unsafe { flatten!(CountMinSketch { width, depth, counters: counters_arr, }) } } pub fn to_internal_countminsketch(&self) -> CountMinSketchInternal { let depth: u64 = self.depth.into(); let hashfuncs = (1..=depth).map(CountMinHashFn::with_key).collect(); let mut counters: Vec> = Vec::with_capacity(self.depth as usize); let row_width = self.width as usize; for row in 0..self.depth { let row_start = (row * self.width) as usize; counters.push( self.counters .iter() .skip(row_start) .take(row_width) .collect(), ); } CountMinSketchInternal::new( self.width as usize, self.depth as usize, hashfuncs, counters, ) } pub fn from_internal_countminsketch(sketch: &mut CountMinSketchInternal) -> Self { CountMinSketch::new( sketch.width().try_into().unwrap(), sketch.depth().try_into().unwrap(), sketch.counters().iter().flatten().cloned().collect(), ) } } ron_inout_funcs!(CountMinSketch<'input>); } use toolkit_experimental::CountMinSketch; #[aggregate] impl toolkit_experimental::count_min_sketch { type State = CountMinSketchInternal; fn transition( state: Option, #[sql_type("text")] value: Option, #[sql_type("float")] error: f64, #[sql_type("float")] probability: f64, ) -> Option { let value = match value { None => return state, Some(value) => value, }; let mut state = match state { None => CountMinSketchInternal::with_prob(error, probability), Some(state) => state, }; state.add_value(value); Some(state) } fn finally(state: Option<&mut State>) -> Option> { state.map(CountMinSketch::from_internal_countminsketch) } const PARALLEL_SAFE: bool = true; fn serialize(state: &mut State) -> bytea { crate::do_serialize!(state) } fn deserialize(bytes: bytea) -> State { crate::do_deserialize!(bytes, State) } fn combine(state1: Option<&State>, state2: Option<&State>) -> Option { match (state1, state2) { (None, None) => None, (None, Some(only)) | (Some(only), None) => Some(only.clone()), (Some(a), Some(b)) => { let (mut a, b) = (a.clone(), b.clone()); a.combine(b); Some(a) } } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn approx_count<'a>(item: String, aggregate: Option>) -> Option { aggregate.map(|sketch| CountMinSketch::to_internal_countminsketch(&sketch).estimate(item)) } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn test_countminsketch() { Spi::connect_mut(|client| { client .update("CREATE TABLE test (data TEXT)", None, &[]) .unwrap(); client.update("INSERT INTO test SELECT generate_series(1, 100)::TEXT UNION ALL SELECT generate_series(1, 50)::TEXT", None, &[]).unwrap(); let sanity = client .update("SELECT COUNT(*) FROM test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(Some(150), sanity); client .update( "CREATE VIEW sketch AS \ SELECT toolkit_experimental.count_min_sketch(data, 0.01, 0.01) \ FROM test", None, &[], ) .unwrap(); let sanity = client .update("SELECT COUNT(*) FROM sketch", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert!(sanity.unwrap_or(0) > 0); let (col1, col2, col3) = client .update( "SELECT \ toolkit_experimental.approx_count('1', count_min_sketch), \ toolkit_experimental.approx_count('51', count_min_sketch), \ toolkit_experimental.approx_count('101', count_min_sketch) \ FROM sketch", None, &[], ) .unwrap() .first() .get_three::() .unwrap(); // 0.01 => error param to the sketch, 150 => number of items added to the sketch let err_margin = 0.01 * 150.0; let items = [(col1, 2), (col2, 1), (col3, 0)]; for (approx_count, expected) in items { let approx_count = approx_count.unwrap(); assert!(expected <= approx_count); let upper_bound = err_margin + expected as f64; let approx_count = approx_count as f64; assert!(approx_count < upper_bound); } }); } #[pg_test] fn test_countminsketch_combine() { Spi::connect_mut(|client| { let combined = client .update( "SELECT toolkit_experimental.approx_count('1', toolkit_experimental.count_min_sketch(v::text, 0.01, 0.01)) FROM (SELECT * FROM generate_series(1, 100) v \ UNION ALL \ SELECT * FROM generate_series(1, 100)) u(v)", None, &[], ) .unwrap().first() .get_one::().unwrap(); let expected = 2; // 0.01 => error param to the sketch, 200 => number of items added to the sketch let err_margin = 0.01 * 200.0; let approx_count = combined.unwrap(); assert!(expected <= approx_count); let upper_bound = err_margin + expected as f64; let approx_count = approx_count as f64; assert!(approx_count < upper_bound); }); } #[pg_test] fn countminsketch_io_test() { Spi::connect_mut(|client| { client .update("CREATE TABLE io_test (value TEXT)", None, &[]) .unwrap(); client.update("INSERT INTO io_test VALUES ('lorem'), ('ipsum'), ('dolor'), ('sit'), ('amet'), ('consectetur'), ('adipiscing'), ('elit')", None, &[]).unwrap(); let sketch = client .update( "SELECT toolkit_experimental.count_min_sketch(value, 0.5, 0.01)::text FROM io_test", None, &[] ) .unwrap().first() .get_one::().unwrap(); let expected = "(\ version:1,\ width:6,\ depth:5,\ counters:[\ 1,2,2,1,1,1,\ 0,0,2,3,1,2,\ 1,0,3,0,4,0,\ 1,3,2,0,1,1,\ 0,0,4,3,0,1\ ]\ )"; assert_eq!(sketch, Some(expected.into())); }); } #[pg_test] fn test_cms_null_input_yields_null_output() { Spi::connect_mut(|client| { let output = client .update( "SELECT toolkit_experimental.count_min_sketch(NULL::TEXT, 0.1, 0.1)::TEXT", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(output, None) }) } #[pg_test] fn test_approx_count_null_input_yields_null_output() { Spi::connect_mut(|client| { let output = client .update( "SELECT toolkit_experimental.approx_count('1'::text, NULL::toolkit_experimental.countminsketch)", None, &[] ) .unwrap().first() .get_one::().unwrap(); assert_eq!(output, None) }) } } ================================================ FILE: extension/src/datum_utils.rs ================================================ use std::{ fmt, hash::{BuildHasher, Hasher}, mem::size_of, slice, }; use serde::{ de::{SeqAccess, Visitor}, ser::SerializeSeq, Deserialize, Serialize, }; use pg_sys::{Datum, Oid}; use pgrx::*; use crate::serialization::{PgCollationId, ShortTypeId}; pub(crate) unsafe fn deep_copy_datum(datum: Datum, typoid: Oid) -> Datum { let tentry = pg_sys::lookup_type_cache(typoid, 0_i32); if (*tentry).typbyval { datum } else if (*tentry).typlen > 0 { // only varlena's can be toasted, manually copy anything with len >0 let size = (*tentry).typlen as usize; let copy = pg_sys::palloc0(size); std::ptr::copy(datum.cast_mut_ptr(), copy as *mut u8, size); pg_sys::Datum::from(copy) } else { pg_sys::Datum::from(pg_sys::pg_detoast_datum_copy(datum.cast_mut_ptr())) } } // If datum is an alloced type, free the associated memory pub(crate) unsafe fn free_datum(datum: Datum, typoid: Oid) { let tentry = pg_sys::lookup_type_cache(typoid, 0_i32); if !(*tentry).typbyval { pg_sys::pfree(datum.cast_mut_ptr()) } } // TODO: is there a better place for this? // Note that this requires an reference time to deal with variable length intervals (days or months) pub fn ts_interval_sum_to_ms( ref_time: &crate::raw::TimestampTz, interval: &crate::raw::Interval, ) -> i64 { unsafe extern "C-unwind" { #[allow(improper_ctypes)] fn timestamptz_pl_interval(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; } let bound = unsafe { pg_sys::DirectFunctionCall2Coll( Some(timestamptz_pl_interval), pg_sys::InvalidOid, ref_time.0, interval.0, ) }; bound.value() as i64 } pub fn interval_to_ms(ref_time: &crate::raw::TimestampTz, interval: &crate::raw::Interval) -> i64 { ts_interval_sum_to_ms(ref_time, interval) - ref_time.0.value() as i64 } pub struct TextSerializableDatumWriter { flinfo: pg_sys::FmgrInfo, } impl TextSerializableDatumWriter { pub fn from_oid(typoid: Oid) -> Self { let mut type_output = pg_sys::Oid::INVALID; let mut typ_is_varlena = false; let mut flinfo = unsafe { std::mem::MaybeUninit::zeroed().assume_init() }; unsafe { pg_sys::getTypeOutputInfo(typoid, &mut type_output, &mut typ_is_varlena); pg_sys::fmgr_info(type_output, &mut flinfo); } TextSerializableDatumWriter { flinfo } } pub fn make_serializable(&mut self, datum: Datum) -> TextSerializeableDatum { TextSerializeableDatum(datum, &mut self.flinfo) } } pub struct DatumFromSerializedTextReader { flinfo: pg_sys::FmgrInfo, typ_io_param: pg_sys::Oid, } impl DatumFromSerializedTextReader { pub fn from_oid(typoid: Oid) -> Self { let mut type_input = pg_sys::Oid::INVALID; let mut typ_io_param = pg_sys::oids::Oid::INVALID; let mut flinfo = unsafe { std::mem::MaybeUninit::zeroed().assume_init() }; unsafe { pg_sys::getTypeInputInfo(typoid, &mut type_input, &mut typ_io_param); pg_sys::fmgr_info(type_input, &mut flinfo); } DatumFromSerializedTextReader { flinfo, typ_io_param, } } pub fn read_datum(&mut self, datum_str: &str) -> Datum { let cstr = std::ffi::CString::new(datum_str).unwrap(); // TODO: error handling let cstr_ptr = cstr.as_ptr() as *mut std::os::raw::c_char; unsafe { pg_sys::InputFunctionCall(&mut self.flinfo, cstr_ptr, self.typ_io_param, -1) } } } pub struct TextSerializeableDatum(Datum, *mut pg_sys::FmgrInfo); impl Serialize for TextSerializeableDatum { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let chars = unsafe { pg_sys::OutputFunctionCall(self.1, self.0) }; let cstr = unsafe { std::ffi::CStr::from_ptr(chars) }; serializer.serialize_str(cstr.to_str().unwrap()) } } pub(crate) struct DatumHashBuilder { pub info: pg_sys::FunctionCallInfo, pub type_id: pg_sys::Oid, pub collation: pg_sys::Oid, } impl DatumHashBuilder { pub(crate) unsafe fn from_type_id(type_id: pg_sys::Oid, collation: Option) -> Self { let entry = pg_sys::lookup_type_cache(type_id, pg_sys::TYPECACHE_HASH_EXTENDED_PROC_FINFO as _); Self::from_type_cache_entry(entry, collation) } pub(crate) unsafe fn from_type_cache_entry( tentry: *const pg_sys::TypeCacheEntry, collation: Option, ) -> Self { let flinfo = if (*tentry).hash_extended_proc_finfo.fn_addr.is_some() { &(*tentry).hash_extended_proc_finfo } else { pgrx::error!("no hash function"); }; // 1 argument for the key, 1 argument for the seed let size = size_of::() + size_of::() * 2; let info = pg_sys::palloc0(size) as pg_sys::FunctionCallInfo; (*info).flinfo = flinfo as *const pg_sys::FmgrInfo as *mut pg_sys::FmgrInfo; (*info).context = std::ptr::null_mut(); (*info).resultinfo = std::ptr::null_mut(); (*info).fncollation = (*tentry).typcollation; (*info).isnull = false; (*info).nargs = 1; let collation = match collation { Some(collation) => collation, None => (*tentry).typcollation, }; Self { info, type_id: (*tentry).type_id, collation, } } } impl Clone for DatumHashBuilder { fn clone(&self) -> Self { unsafe { DatumHashBuilder::from_type_id(self.type_id, Some(self.collation)) } } } impl BuildHasher for DatumHashBuilder { type Hasher = DatumHashBuilder; fn build_hasher(&self) -> Self::Hasher { Self { info: self.info, type_id: self.type_id, collation: self.collation, } } } impl Hasher for DatumHashBuilder { fn finish(&self) -> u64 { //FIXME ehhh, this is wildly unsafe, should at least have a separate hash // buffer for each, probably should have separate args let value = unsafe { let value = (*(*self.info).flinfo).fn_addr.unwrap()(self.info); (*self.info).args.as_mut_slice(1)[0] = pg_sys::NullableDatum { value: Datum::from(0_usize), isnull: true, }; (*self.info).isnull = false; //FIXME 32bit vs 64 bit get value from datum on 32b arch value }; value.value() as u64 } fn write(&mut self, bytes: &[u8]) { if bytes.len() != size_of::() { panic!("invalid datum hash") } let mut b = [0; size_of::()]; b[..size_of::()].clone_from_slice(&bytes[..size_of::()]); self.write_usize(usize::from_ne_bytes(b)) } fn write_usize(&mut self, i: usize) { unsafe { (*self.info).args.as_mut_slice(1)[0] = pg_sys::NullableDatum { value: Datum::from(i), isnull: false, }; (*self.info).isnull = false; } } } impl PartialEq for DatumHashBuilder { fn eq(&self, other: &Self) -> bool { self.type_id.eq(&other.type_id) } } impl Eq for DatumHashBuilder {} impl Serialize for DatumHashBuilder { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let collation = if self.collation == pg_sys::oids::Oid::INVALID { None } else { Some(PgCollationId(self.collation)) }; (ShortTypeId(self.type_id), collation).serialize(serializer) } } impl<'de> Deserialize<'de> for DatumHashBuilder { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let (type_id, collation) = <(ShortTypeId, Option)>::deserialize(deserializer)?; //FIXME no collation? let deserialized = unsafe { Self::from_type_id(type_id.0, collation.map(|c| c.0)) }; Ok(deserialized) } } #[inline] fn div_round_up(numerator: usize, divisor: usize) -> usize { numerator.div_ceil(divisor) } #[inline] fn round_to_multiple(value: usize, multiple: usize) -> usize { div_round_up(value, multiple) * multiple } #[inline] fn padded_va_len(ptr: *const pg_sys::varlena) -> usize { unsafe { round_to_multiple(varsize_any(ptr), 8) } } flat_serialize_macro::flat_serialize! { #[derive(Debug)] struct DatumStore<'input> { type_oid: crate::serialization::ShortTypeId, data_len: u32, // XXX this must be aligned to 8-bytes to ensure the stored data is correctly aligned data: [u8; self.data_len], } } impl<'a> Serialize for DatumStore<'a> { // TODO currently this always serializes inner data as text. When we start // working on more-efficient network serialization, or we start using this // in a transition state, we should use the binary format if we don't need // human-readable output. fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let mut writer = TextSerializableDatumWriter::from_oid(self.type_oid.0); let count = self.iter().count(); let mut seq = serializer.serialize_seq(Some(count + 1))?; seq.serialize_element(&self.type_oid.0.to_u32())?; for element in self.iter() { seq.serialize_element(&writer.make_serializable(element))?; } seq.end() } } impl<'a, 'de> Deserialize<'de> for DatumStore<'a> { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { struct DatumStoreVisitor<'a>(std::marker::PhantomData>); impl<'de, 'a> Visitor<'de> for DatumStoreVisitor<'a> { type Value = DatumStore<'a>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a sequence encoding a DatumStore object") } fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'de>, { let oid = Oid::from(seq.next_element::().unwrap().unwrap()); // TODO: error handling // TODO separate human-readable and binary forms let mut reader = DatumFromSerializedTextReader::from_oid(oid); let mut data = vec![]; while let Some(next) = seq.next_element::<&str>()? { data.push(reader.read_datum(next)); } Ok((oid, data).into()) } } deserializer.deserialize_seq(DatumStoreVisitor(std::marker::PhantomData)) } } impl From<(Oid, Vec)> for DatumStore<'_> { fn from(input: (Oid, Vec)) -> Self { let (oid, datums) = input; let (tlen, typbyval) = unsafe { let tentry = pg_sys::lookup_type_cache(oid, 0_i32); ((*tentry).typlen, (*tentry).typbyval) }; assert!(tlen.is_positive() || tlen == -1 || tlen == -2); if typbyval { // Datum by value // pad entries out to 8 byte aligned values...this may be a source of inefficiency let data_len = round_to_multiple(tlen as usize, 8) as u32 * datums.len() as u32; let mut data: Vec = vec![]; for datum in datums { data.extend_from_slice(&datum.value().to_ne_bytes()); } DatumStore { type_oid: oid.into(), data_len, data: data.into(), } } else if tlen == -1 { // Varlena let mut ptrs = Vec::new(); let mut total_data_bytes = 0; for datum in datums { unsafe { let ptr = pg_sys::pg_detoast_datum_packed(datum.cast_mut_ptr::()); let va_len = varsize_any(ptr); ptrs.push(ptr); total_data_bytes += round_to_multiple(va_len, 8); // Round up to 8 byte boundary } } let mut buffer = vec![0u8; total_data_bytes]; let mut target_byte = 0; for ptr in ptrs { unsafe { let va_len = varsize_any(ptr); std::ptr::copy( ptr as *const u8, std::ptr::addr_of_mut!(buffer[target_byte]), va_len, ); target_byte += round_to_multiple(va_len, 8); } } DatumStore { type_oid: oid.into(), data_len: total_data_bytes as u32, data: buffer.into(), } } else if tlen == -2 { // Null terminated string, should not be possible in this context panic!("Unexpected null-terminated string type encountered."); } else { // Fixed size reference // Round size to multiple of 8 bytes let len = round_to_multiple(tlen as usize, 8); let total_length = len * datums.len(); let mut buffer = vec![0u8; total_length]; for (i, datum) in datums.iter().enumerate() { unsafe { std::ptr::copy( datum.cast_mut_ptr(), std::ptr::addr_of_mut!(buffer[i * len]), tlen as usize, ) }; } DatumStore { type_oid: oid.into(), data_len: total_length as u32, data: buffer.into(), } } } } pub enum DatumStoreIterator<'a, 'b> { Value { iter: slice::Iter<'a, Datum>, }, Varlena { store: &'b DatumStore<'a>, next_offset: u32, }, FixedSize { store: &'b DatumStore<'a>, next_index: u32, datum_size: u32, }, } impl<'a, 'b> Iterator for DatumStoreIterator<'a, 'b> { type Item = Datum; fn next(&mut self) -> Option { match self { DatumStoreIterator::Value { iter } => iter.next().copied(), DatumStoreIterator::Varlena { store, next_offset } => { if *next_offset >= store.data_len { None } else { unsafe { let va = store.data.slice().as_ptr().offset(*next_offset as _); *next_offset += padded_va_len(va as *const _) as u32; Some(pg_sys::Datum::from(va)) } } } DatumStoreIterator::FixedSize { store, next_index, datum_size, } => { let idx = *next_index * *datum_size; if idx >= store.data_len { None } else { *next_index += 1; Some(pg_sys::Datum::from(unsafe { store.data.slice().as_ptr().offset(idx as _) })) } } } } } impl<'a> DatumStore<'a> { pub fn iter<'b>(&'b self) -> DatumStoreIterator<'a, 'b> { unsafe { let tentry = pg_sys::lookup_type_cache(self.type_oid.into(), 0_i32); if (*tentry).typbyval { // Datum by value DatumStoreIterator::Value { // SAFETY `data` is guaranteed to be 8-byte aligned, so it should be safe to use as a slice iter: std::slice::from_raw_parts( self.data.as_slice().as_ptr() as *const Datum, self.data_len as usize / 8, ) .iter(), } } else if (*tentry).typlen == -1 { // Varlena DatumStoreIterator::Varlena { store: self, next_offset: 0, } } else if (*tentry).typlen == -2 { // Null terminated string unreachable!() } else { // Fixed size reference assert!((*tentry).typlen.is_positive()); DatumStoreIterator::FixedSize { store: self, next_index: 0, datum_size: round_to_multiple((*tentry).typlen as usize, 8) as u32, } } } } pub fn into_anyelement_iter(self) -> impl Iterator + 'a { let oid: pg_sys::Oid = self.type_oid.into(); self.into_iter() .map(move |x| unsafe { AnyElement::from_polymorphic_datum(x, false, oid) }.unwrap()) } } // This is essentially the same as the DatumStoreIterator except that it takes ownership of the DatumStore, // there should be some way to efficiently merge these implementations pub enum DatumStoreIntoIterator<'a> { Value { store: DatumStore<'a>, next_idx: u32, }, Varlena { store: DatumStore<'a>, next_offset: u32, }, FixedSize { store: DatumStore<'a>, next_index: u32, datum_size: u32, }, } // iterate over the set of values in the datum store // will return pointers into the datum store if it's a by-ref type impl<'a> Iterator for DatumStoreIntoIterator<'a> { type Item = Datum; fn next(&mut self) -> Option { match self { DatumStoreIntoIterator::Value { store, next_idx } => { let idx = *next_idx as usize; let bound = store.data_len as usize / 8; if idx >= bound { None } else { // SAFETY `data` is guaranteed to be 8-byte aligned, so it is safe to use as a usize slice let dat = unsafe { std::slice::from_raw_parts( store.data.as_slice().as_ptr() as *const Datum, bound, )[idx] }; *next_idx += 1; Some(dat) } } DatumStoreIntoIterator::Varlena { store, next_offset } => { if *next_offset >= store.data_len { None } else { unsafe { let va = store.data.slice().as_ptr().offset(*next_offset as _); *next_offset += padded_va_len(va as *const _) as u32; Some(pg_sys::Datum::from(va)) } } } DatumStoreIntoIterator::FixedSize { store, next_index, datum_size, } => { let idx = *next_index * *datum_size; if idx >= store.data_len { None } else { *next_index += 1; Some(pg_sys::Datum::from(unsafe { store.data.slice().as_ptr().offset(idx as _) })) } } } } } impl<'a> IntoIterator for DatumStore<'a> { type Item = Datum; type IntoIter = DatumStoreIntoIterator<'a>; fn into_iter(self) -> Self::IntoIter { unsafe { let tentry = pg_sys::lookup_type_cache(self.type_oid.into(), 0_i32); if (*tentry).typbyval { // Datum by value DatumStoreIntoIterator::Value { store: self, next_idx: 0, } } else if (*tentry).typlen == -1 { // Varlena DatumStoreIntoIterator::Varlena { store: self, next_offset: 0, } } else if (*tentry).typlen == -2 { // Null terminated string unreachable!() } else { // Fixed size reference assert!((*tentry).typlen.is_positive()); DatumStoreIntoIterator::FixedSize { store: self, next_index: 0, datum_size: round_to_multiple((*tentry).typlen as usize, 8) as u32, } } } } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use crate::{build, palloc::Inner, pg_type, ron_inout_funcs}; use aggregate_builder::*; use pgrx_macros::pg_test; #[pg_schema] pub mod toolkit_experimental { use super::*; pg_type! { #[derive(Debug)] struct DatumStoreTester<'input> { datums: DatumStore<'input>, } } ron_inout_funcs!(DatumStoreTester<'input>); #[aggregate] impl toolkit_experimental::datum_test_agg { type State = (Oid, Vec); fn transition( state: Option, #[sql_type("AnyElement")] value: AnyElement, ) -> Option { match state { Some((oid, mut vector)) => { unsafe { vector.push(deep_copy_datum(value.datum(), oid)) }; Some((oid, vector)) } None => Some(( value.oid(), vec![unsafe { deep_copy_datum(value.datum(), value.oid()) }], )), } } fn finally(state: Option<&mut State>) -> Option> { state.map(|state| { build! { DatumStoreTester { datums: DatumStore::from(std::mem::take(state)), } } }) } } } #[pg_test] fn test_value_datum_store() { Spi::connect_mut(|client| { let test = client.update("SELECT toolkit_experimental.datum_test_agg(r.data)::TEXT FROM (SELECT generate_series(10, 100, 10) as data) r", None, &[]) .unwrap().first() .get_one::().unwrap().unwrap(); let expected = "(version:1,datums:[23,\"10\",\"20\",\"30\",\"40\",\"50\",\"60\",\"70\",\"80\",\"90\",\"100\"])"; assert_eq!(test, expected); }); } #[pg_test] fn test_varlena_datum_store() { Spi::connect_mut(|client| { let test = client.update("SELECT toolkit_experimental.datum_test_agg(r.data)::TEXT FROM (SELECT generate_series(10, 100, 10)::TEXT as data) r", None, &[]) .unwrap().first() .get_one::().unwrap().unwrap(); let expected = "(version:1,datums:[25,\"10\",\"20\",\"30\",\"40\",\"50\",\"60\",\"70\",\"80\",\"90\",\"100\"])"; assert_eq!(test, expected); }); } #[pg_test] fn test_byref_datum_store() { Spi::connect_mut(|client| { let test = client.update("SELECT toolkit_experimental.datum_test_agg(r.data)::TEXT FROM (SELECT (generate_series(10, 100, 10)::TEXT || ' seconds')::INTERVAL as data) r", None, &[]) .unwrap().first() .get_one::().unwrap().unwrap(); let expected = "(version:1,datums:[1186,\"00:00:10\",\"00:00:20\",\"00:00:30\",\"00:00:40\",\"00:00:50\",\"00:01:00\",\"00:01:10\",\"00:01:20\",\"00:01:30\",\"00:01:40\"])"; assert_eq!(test, expected); }); } } ================================================ FILE: extension/src/duration.rs ================================================ //! Utilities for working with durations. Parsing of duration units is intended to match how //! PostgreSQL parses duration units. Currently units longer than an hour are unsupported since //! the length of days varies when in a timezone with daylight savings time. use core::fmt::{self, Formatter}; // Canonical PostgreSQL units: https://github.com/postgres/postgres/blob/b76fb6c2a99eb7d49f96e56599fef1ffc1c134c9/src/include/utils/datetime.h#L48-L60 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum DurationUnit { // units should be ordered smallest -> largest Microsec, Millisec, Second, Minute, Hour, } impl DurationUnit { pub fn microseconds(self) -> u32 { match self { Self::Microsec => 1, Self::Millisec => 1000, Self::Second => 1_000_000, Self::Minute => 60_000_000, Self::Hour => 3_600_000_000, } } /// Convert `amount` of a unit to another unit. pub fn convert_unit(self, amount: f64, to: Self) -> f64 { let microseconds = amount * (self.microseconds() as f64); microseconds / (to.microseconds() as f64) } /// Tries to get a duration unit from a string, returning `None` if no known unit matched. pub fn from_str(s: &str) -> Option { // Aliases for canonical units: https://github.com/postgres/postgres/blob/b76fb6c2a99eb7d49f96e56599fef1ffc1c134c9/src/backend/utils/adt/datetime.c#L187-L247 match s.to_lowercase().as_str() { "usecond" | "microsecond" | "microseconds" | "microsecon" | "us" | "usec" | "useconds" | "usecs" => Some(Self::Microsec), "msecond" | "millisecond" | "milliseconds" | "millisecon" | "ms" | "msec" | "mseconds" | "msecs" => Some(Self::Millisec), "second" | "s" | "sec" | "seconds" | "secs" => Some(Self::Second), "minute" | "m" | "min" | "mins" | "minutes" => Some(Self::Minute), "hour" | "hours" | "h" | "hr" | "hrs" => Some(Self::Hour), _ => None, } } } impl fmt::Display for DurationUnit { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { DurationUnit::Microsec => write!(f, "microsecond"), DurationUnit::Millisec => write!(f, "millisecond"), DurationUnit::Second => write!(f, "second"), DurationUnit::Minute => write!(f, "minute"), DurationUnit::Hour => write!(f, "hour"), } } } #[cfg(test)] mod test { use super::*; #[test] fn convert_unit() { let load_time_secs = 75.0; let load_time_mins = DurationUnit::convert_unit(DurationUnit::Second, load_time_secs, DurationUnit::Minute); assert_eq!(load_time_mins, 1.25); } #[test] fn parse_unit() { assert_eq!( DurationUnit::from_str("usecs"), Some(DurationUnit::Microsec) ); assert_eq!(DurationUnit::from_str("MINUTE"), Some(DurationUnit::Minute)); assert_eq!( DurationUnit::from_str("MiLlIsEcOn"), Some(DurationUnit::Millisec) ); assert_eq!(DurationUnit::from_str("pahar"), None); assert_eq!(DurationUnit::from_str(""), None); } } ================================================ FILE: extension/src/frequency.rs ================================================ //! Based on the paper: https://cs.ucsb.edu/sites/default/files/documents/2005-23.pdf use std::fmt; use pgrx::{ iter::{SetOfIterator, TableIterator}, *, }; use pg_sys::{Datum, Oid}; use serde::{ de::{SeqAccess, Visitor}, ser::SerializeSeq, Deserialize, Serialize, }; use crate::{ accessors::{ AccessorIntoValues, AccessorMaxFrequencyInt, AccessorMinFrequencyInt, AccessorTopNCount, AccessorTopn, }, aggregate_utils::{get_collation_or_default, in_aggregate_context}, build, datum_utils::{ deep_copy_datum, DatumFromSerializedTextReader, DatumHashBuilder, DatumStore, TextSerializableDatumWriter, }, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_any_element::{PgAnyElement, PgAnyElementHashMap}, pg_type, raw::{bytea, text}, ron_inout_funcs, }; use spfunc::zeta::zeta; use statrs::function::harmonic::gen_harmonic; // Helper functions for zeta distribution // Default s-value const DEFAULT_ZETA_SKEW: f64 = 1.1; // probability of the nth element of a zeta distribution fn zeta_eq_n(skew: f64, n: u64) -> f64 { 1.0 / zeta(skew) * (n as f64).powf(-skew) } // cumulative distribution <= n in a zeta distribution fn zeta_le_n(skew: f64, n: u64) -> f64 { gen_harmonic(n, skew) / zeta(skew) } struct SpaceSavingEntry { value: Datum, count: u64, overcount: u64, } impl SpaceSavingEntry { fn clone(&self, typoid: Oid) -> SpaceSavingEntry { SpaceSavingEntry { value: unsafe { deep_copy_datum(self.value, typoid) }, count: self.count, overcount: self.overcount, } } } pub struct SpaceSavingTransState { entries: Vec, indices: PgAnyElementHashMap, total_vals: u64, freq_param: f64, // This is the minimum frequency for a freq_agg or the skew for a mcv_agg topn: u32, // 0 for freq_agg, creation parameter for mcv_agg max_size: u32, // Maximum size for indices } impl Clone for SpaceSavingTransState { fn clone(&self) -> Self { let mut new_state = Self { entries: vec![], indices: PgAnyElementHashMap::with_hasher(self.indices.hasher().clone()), total_vals: self.total_vals, freq_param: self.freq_param, max_size: self.max_size, topn: self.topn, }; let typoid = self.type_oid(); for entry in &self.entries { new_state.entries.push(SpaceSavingEntry { value: unsafe { deep_copy_datum(entry.value, typoid) }, count: entry.count, overcount: entry.overcount, }) } new_state.update_all_map_indices(); new_state } } // SpaceSavingTransState is a little tricky to serialize due to needing the typ oid to serialize the Datums. // This sort of requirement doesn't play nicely with the serde framework, so as a workaround we simply // serialize the object as one big sequence. The serialized sequence should look like this: // total_vals as u64 // min_freq as f64 // max_idx as u32 // topn as u32 // indices.hasher as DatumHashBuilder // entries as repeated (str, u64, u64) tuples impl Serialize for SpaceSavingTransState { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let mut seq = serializer.serialize_seq(Some(self.entries.len() + 5))?; seq.serialize_element(&self.total_vals)?; seq.serialize_element(&self.freq_param)?; seq.serialize_element(&self.max_size)?; seq.serialize_element(&self.topn)?; seq.serialize_element(&self.indices.hasher())?; // TODO JOSH use a writer that switches based on whether we want binary or not let mut writer = TextSerializableDatumWriter::from_oid(self.type_oid()); for entry in &self.entries { seq.serialize_element(&( writer.make_serializable(entry.value), entry.count, entry.overcount, ))?; } seq.end() } } impl<'de> Deserialize<'de> for SpaceSavingTransState { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { struct FrequencyTransStateVisitor(); impl<'de> Visitor<'de> for FrequencyTransStateVisitor { type Value = SpaceSavingTransState; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("a sequence encoding a FrequencyTransState object") } fn visit_seq(self, mut seq: A) -> Result where A: SeqAccess<'de>, { let total_vals = seq.next_element::()?.unwrap(); let min_freq = seq.next_element::()?.unwrap(); let max_size = seq.next_element::()?.unwrap(); let topn = seq.next_element::()?.unwrap(); let hasher = seq.next_element::()?.unwrap(); let mut state = SpaceSavingTransState { entries: vec![], indices: PgAnyElementHashMap::with_hasher(hasher), total_vals, freq_param: min_freq, max_size, topn, }; let typid = state.type_oid(); let mut reader = DatumFromSerializedTextReader::from_oid(typid); while let Some((datum_str, count, overcount)) = seq.next_element::<(&str, u64, u64)>()? { let datum = reader.read_datum(datum_str); state.entries.push(SpaceSavingEntry { value: unsafe { deep_copy_datum(datum, typid) }, count, overcount, }); } state.update_all_map_indices(); Ok(state) } } deserializer.deserialize_seq(FrequencyTransStateVisitor()) } } impl SpaceSavingTransState { fn max_size_for_freq(min_freq: f64) -> u32 { (1. / min_freq) as u32 + 1 } fn freq_agg_from_type_id(min_freq: f64, typ: pg_sys::Oid, collation: Option) -> Self { SpaceSavingTransState { entries: vec![], indices: PgAnyElementHashMap::new(typ, collation), total_vals: 0, freq_param: min_freq, max_size: SpaceSavingTransState::max_size_for_freq(min_freq), topn: 0, } } fn mcv_agg_from_type_id( skew: f64, nval: u32, typ: pg_sys::Oid, collation: Option, ) -> Self { if nval == 0 { pgrx::error!("mcv aggregate requires an n value > 0") } if skew <= 1.0 { pgrx::error!("mcv aggregate requires a skew factor > 1.0") } let prob_eq_n = zeta_eq_n(skew, nval as u64); let prob_lt_n = zeta_le_n(skew, nval as u64 - 1); SpaceSavingTransState { entries: vec![], indices: PgAnyElementHashMap::new(typ, collation), total_vals: 0, freq_param: skew, max_size: nval - 1 + SpaceSavingTransState::max_size_for_freq(prob_eq_n / (1.0 - prob_lt_n)), topn: nval, } } fn ingest_aggregate_data( &mut self, val_count: u64, values: &DatumStore, counts: &[u64], overcounts: &[u64], ) { assert_eq!(self.total_vals, 0); // This should only be called on an empty aggregate self.total_vals = val_count; for (idx, datum) in values.iter().enumerate() { self.entries.push(SpaceSavingEntry { value: unsafe { deep_copy_datum(datum, self.indices.typoid()) }, count: counts[idx], overcount: overcounts[idx], }); self.indices .insert((self.entries[idx].value, self.type_oid()).into(), idx); } } fn ingest_aggregate_ints( &mut self, val_count: u64, values: &[i64], counts: &[u64], overcounts: &[u64], ) { assert_eq!(self.total_vals, 0); // This should only be called on an empty aggregate assert_eq!(self.type_oid(), pg_sys::INT8OID); self.total_vals = val_count; for (idx, val) in values.iter().enumerate() { self.entries.push(SpaceSavingEntry { value: Datum::from(*val), count: counts[idx], overcount: overcounts[idx], }); self.indices .insert((self.entries[idx].value, self.type_oid()).into(), idx); } } fn type_oid(&self) -> Oid { self.indices.typoid() } fn add(&mut self, element: PgAnyElement) { self.total_vals += 1; if let Some(idx) = self.indices.get(&element) { let idx = *idx; self.entries[idx].count += 1; self.move_left(idx); } else if self.entries.len() < self.max_size as usize { let new_idx = self.entries.len(); self.entries.push(SpaceSavingEntry { value: element.deep_copy_datum(), count: 1, overcount: 0, }); // Important to create the indices entry using the datum in the local context self.indices.insert( (self.entries[new_idx].value, self.type_oid()).into(), new_idx, ); } else { let new_value = element.deep_copy_datum(); // TODO: might be more efficient to replace the lowest indexed tail value (count matching last) and not call move_up let typoid = self.type_oid(); let entry = self.entries.last_mut().unwrap(); self.indices.remove(&(entry.value, typoid).into()); entry.value = new_value; // JOSH FIXME should we pfree() old value if by-ref? entry.overcount = entry.count; entry.count += 1; self.indices .insert((new_value, typoid).into(), self.entries.len() - 1); self.move_left(self.entries.len() - 1); } } // swap element i with an earlier element in the 'entries' vector to maintain decreasing order fn move_left(&mut self, i: usize) { let count = self.entries[i].count; let mut target = i; while target > 0 && self.entries[target - 1].count < count { target -= 1; } if target != i { self.entries.swap(i, target); self.update_map_index(i); self.update_map_index(target); } } // Adds the 'indices' lookup entry for the value at 'entries' index i fn update_map_index(&mut self, i: usize) { let element_for_i = (self.entries[i].value, self.type_oid()).into(); if let Some(entry) = self.indices.get_mut(&element_for_i) { *entry = i; } else { self.indices.insert(element_for_i, i); } } fn update_all_map_indices(&mut self) { for i in 0..self.entries.len() { self.update_map_index(i); } } fn combine(one: &SpaceSavingTransState, two: &SpaceSavingTransState) -> SpaceSavingTransState { // This takes an entry from a TransState, updates it with any state from the other TransState, and adds the result into the map fn new_entry( entry: &SpaceSavingEntry, other: &SpaceSavingTransState, map: &mut PgAnyElementHashMap, ) { let typoid = other.type_oid(); let mut new_ent = entry.clone(typoid); let new_dat = (new_ent.value, typoid).into(); match other.indices.get(&new_dat) { Some(&idx) => { new_ent.count += other.entries[idx].count; new_ent.overcount += other.entries[idx].overcount; } None => { // If the entry value isn't present in the other state, we have to assume that it was recently bumped (unless the other state is not fully populated). let min = if other.indices.len() < other.max_size as usize { 0 } else { other.entries.last().unwrap().count }; new_ent.count += min; new_ent.overcount += min; } } map.insert(new_dat, new_ent); } let hasher = one.indices.hasher().clone(); let mut temp = PgAnyElementHashMap::with_hasher(hasher); // First go through the first state, and add all entries (updated with other other state) to our temporary hashmap for entry in &one.entries { new_entry(entry, two, &mut temp); } // Next add in anything in the second state that isn't already in the map. // TODO JOSH does filter make this easier to read for entry in &two.entries { if !temp.contains_key(&(entry.value, one.type_oid()).into()) { new_entry(entry, one, &mut temp); } } // TODO: get this into_iter working without making temp.0 public let mut entries: Vec = temp.0.into_values().collect(); entries.sort_by(|a, b| b.count.partial_cmp(&a.count).unwrap()); // swap a and b for descending entries.truncate(one.max_size as usize); let mut result = SpaceSavingTransState { entries, indices: PgAnyElementHashMap::with_hasher(one.indices.hasher().clone()), total_vals: one.total_vals + two.total_vals, freq_param: one.freq_param, max_size: one.max_size, topn: one.topn, }; result.update_all_map_indices(); result } } pg_type! { #[derive(Debug)] struct SpaceSavingAggregate<'input> { type_oid: u32, num_values: u32, values_seen: u64, freq_param: f64, topn: u64, // bump this up to u64 to keep alignment counts: [u64; self.num_values], // JOSH TODO look at AoS instead of SoA at some point overcounts: [u64; self.num_values], datums: DatumStore<'input>, } } impl<'input> From<&SpaceSavingTransState> for SpaceSavingAggregate<'input> { fn from(trans: &SpaceSavingTransState) -> Self { let mut values = Vec::new(); let mut counts = Vec::new(); let mut overcounts = Vec::new(); for entry in &trans.entries { values.push(entry.value); counts.push(entry.count); overcounts.push(entry.overcount); } build! { SpaceSavingAggregate { type_oid: trans.type_oid().into(), num_values: trans.entries.len() as _, values_seen: trans.total_vals, freq_param: trans.freq_param, topn: trans.topn as u64, counts: counts.into(), overcounts: overcounts.into(), datums: DatumStore::from((trans.type_oid(), values)), } } } } impl<'input> From<(&SpaceSavingAggregate<'input>, &pg_sys::FunctionCallInfo)> for SpaceSavingTransState { fn from(data_in: (&SpaceSavingAggregate<'input>, &pg_sys::FunctionCallInfo)) -> Self { let (agg, fcinfo) = data_in; let collation = get_collation_or_default(*fcinfo); let mut trans = if agg.topn == 0 { SpaceSavingTransState::freq_agg_from_type_id( agg.freq_param, Oid::from(agg.type_oid), collation, ) } else { SpaceSavingTransState::mcv_agg_from_type_id( agg.freq_param, agg.topn as u32, Oid::from(agg.type_oid), collation, ) }; trans.ingest_aggregate_data( agg.values_seen, &agg.datums, agg.counts.as_slice(), agg.overcounts.as_slice(), ); trans } } ron_inout_funcs!(SpaceSavingAggregate<'input>); pg_type! { #[derive(Debug)] struct SpaceSavingBigIntAggregate<'input> { num_values: u32, topn: u32, values_seen: u64, freq_param: f64, counts: [u64; self.num_values], // JOSH TODO look at AoS instead of SoA at some point overcounts: [u64; self.num_values], datums: [i64; self.num_values], } } impl<'input> From<&SpaceSavingTransState> for SpaceSavingBigIntAggregate<'input> { fn from(trans: &SpaceSavingTransState) -> Self { assert_eq!(trans.type_oid(), pg_sys::INT8OID); let mut values = Vec::new(); let mut counts = Vec::new(); let mut overcounts = Vec::new(); for entry in &trans.entries { values.push(entry.value.value() as i64); counts.push(entry.count); overcounts.push(entry.overcount); } build! { SpaceSavingBigIntAggregate { num_values: trans.entries.len() as _, values_seen: trans.total_vals, freq_param: trans.freq_param, topn: trans.topn, counts: counts.into(), overcounts: overcounts.into(), datums: values.into(), } } } } impl<'input> From<( &SpaceSavingBigIntAggregate<'input>, &pg_sys::FunctionCallInfo, )> for SpaceSavingTransState { fn from( data_in: ( &SpaceSavingBigIntAggregate<'input>, &pg_sys::FunctionCallInfo, ), ) -> Self { let (agg, fcinfo) = data_in; let collation = get_collation_or_default(*fcinfo); let mut trans = if agg.topn == 0 { SpaceSavingTransState::freq_agg_from_type_id(agg.freq_param, pg_sys::INT8OID, collation) } else { SpaceSavingTransState::mcv_agg_from_type_id( agg.freq_param, agg.topn, pg_sys::INT8OID, collation, ) }; trans.ingest_aggregate_ints( agg.values_seen, agg.datums.as_slice(), agg.counts.as_slice(), agg.overcounts.as_slice(), ); trans } } ron_inout_funcs!(SpaceSavingBigIntAggregate<'input>); pg_type! { #[derive(Debug)] struct SpaceSavingTextAggregate<'input> { num_values: u32, topn: u32, values_seen: u64, freq_param: f64, counts: [u64; self.num_values], // JOSH TODO look at AoS instead of SoA at some point overcounts: [u64; self.num_values], datums: DatumStore<'input>, } } impl<'input> From<&SpaceSavingTransState> for SpaceSavingTextAggregate<'input> { fn from(trans: &SpaceSavingTransState) -> Self { assert_eq!(trans.type_oid(), pg_sys::TEXTOID); let mut values = Vec::new(); let mut counts = Vec::new(); let mut overcounts = Vec::new(); for entry in &trans.entries { values.push(entry.value); counts.push(entry.count); overcounts.push(entry.overcount); } build! { SpaceSavingTextAggregate { num_values: trans.entries.len() as _, values_seen: trans.total_vals, freq_param: trans.freq_param, topn: trans.topn, counts: counts.into(), overcounts: overcounts.into(), datums: DatumStore::from((trans.type_oid(), values)), } } } } impl<'input> From<(&SpaceSavingTextAggregate<'input>, &pg_sys::FunctionCallInfo)> for SpaceSavingTransState { fn from(data_in: (&SpaceSavingTextAggregate<'input>, &pg_sys::FunctionCallInfo)) -> Self { let (agg, fcinfo) = data_in; let collation = get_collation_or_default(*fcinfo); let mut trans = if agg.topn == 0 { SpaceSavingTransState::freq_agg_from_type_id(agg.freq_param, pg_sys::TEXTOID, collation) } else { SpaceSavingTransState::mcv_agg_from_type_id( agg.freq_param, agg.topn, pg_sys::TEXTOID, collation, ) }; trans.ingest_aggregate_data( agg.values_seen, &agg.datums, agg.counts.as_slice(), agg.overcounts.as_slice(), ); trans } } ron_inout_funcs!(SpaceSavingTextAggregate<'input>); #[pg_extern(immutable, parallel_safe)] pub fn mcv_agg_trans( state: Internal, n: i32, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { mcv_agg_with_skew_trans(state, n, DEFAULT_ZETA_SKEW, value, fcinfo) } #[pg_extern(immutable, parallel_safe)] pub fn mcv_agg_bigint_trans( state: Internal, n: i32, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { mcv_agg_with_skew_bigint_trans(state, n, DEFAULT_ZETA_SKEW, value, fcinfo) } #[pg_extern(immutable, parallel_safe)] pub fn mcv_agg_text_trans( state: Internal, n: i32, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { mcv_agg_with_skew_text_trans(state, n, DEFAULT_ZETA_SKEW, value, fcinfo) } #[pg_extern(immutable, parallel_safe)] pub fn mcv_agg_with_skew_trans( state: Internal, n: i32, skew: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { space_saving_trans( unsafe { state.to_inner() }, value, fcinfo, |typ, collation| { SpaceSavingTransState::mcv_agg_from_type_id(skew, n as u32, typ, collation) }, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn mcv_agg_with_skew_bigint_trans( state: Internal, n: i32, skew: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let value = match value { None => None, Some(val) => unsafe { AnyElement::from_polymorphic_datum(pg_sys::Datum::from(val), false, pg_sys::INT8OID) }, }; space_saving_trans( unsafe { state.to_inner() }, value, fcinfo, |typ, collation| { SpaceSavingTransState::mcv_agg_from_type_id(skew, n as u32, typ, collation) }, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn mcv_agg_with_skew_text_trans( state: Internal, n: i32, skew: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let txt = value.map(|v| unsafe { pg_sys::pg_detoast_datum_copy(v.0.cast_mut_ptr()) }); let value = match txt { None => None, Some(val) => unsafe { AnyElement::from_polymorphic_datum(pg_sys::Datum::from(val), false, pg_sys::TEXTOID) }, }; space_saving_trans( unsafe { state.to_inner() }, value, fcinfo, |typ, collation| { SpaceSavingTransState::mcv_agg_from_type_id(skew, n as u32, typ, collation) }, ) .internal() } #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] pub fn freq_agg_trans( state: Internal, freq: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { if freq <= 0. || freq >= 1.0 { pgrx::error!("frequency aggregate requires a frequency in the range (0.0, 1.0)") } space_saving_trans( unsafe { state.to_inner() }, value, fcinfo, |typ, collation| SpaceSavingTransState::freq_agg_from_type_id(freq, typ, collation), ) .internal() } #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] pub fn freq_agg_bigint_trans( state: Internal, freq: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let value = match value { None => None, Some(val) => unsafe { AnyElement::from_polymorphic_datum(pg_sys::Datum::from(val), false, pg_sys::INT8OID) }, }; freq_agg_trans(state, freq, value, fcinfo) } #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] pub fn freq_agg_text_trans( state: Internal, freq: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let txt = value.map(|v| unsafe { pg_sys::pg_detoast_datum_copy(v.0.cast_mut_ptr()) }); let value = match txt { None => None, Some(val) => unsafe { AnyElement::from_polymorphic_datum(pg_sys::Datum::from(val), false, pg_sys::TEXTOID) }, }; freq_agg_trans(state, freq, value, fcinfo) } pub fn space_saving_trans( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, make_trans_state: F, ) -> Option> where F: FnOnce(pg_sys::Oid, Option) -> SpaceSavingTransState, { unsafe { in_aggregate_context(fcinfo, || { let value = match value { None => return state, Some(value) => value, }; let mut state = match state { None => { let typ = value.oid(); let collation = get_collation_or_default(fcinfo); make_trans_state(typ, collation).into() } Some(state) => state, }; state.add(value.into()); Some(state) }) } } #[pg_extern(immutable, parallel_safe)] pub fn rollup_agg_trans<'input>( state: Internal, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let value = match value { None => return Some(state), Some(v) => v, }; rollup_agg_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn rollup_agg_trans_inner( state: Option>, value: SpaceSavingAggregate, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let trans = (&value, &fcinfo).into(); if let Some(state) = state { Some(SpaceSavingTransState::combine(&state, &trans).into()) } else { Some(trans.into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn rollup_agg_bigint_trans<'input>( state: Internal, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let value = match value { None => return Some(state), Some(v) => v, }; rollup_agg_bigint_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn rollup_agg_bigint_trans_inner( state: Option>, value: SpaceSavingBigIntAggregate, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let trans = (&value, &fcinfo).into(); if let Some(state) = state { Some(SpaceSavingTransState::combine(&state, &trans).into()) } else { Some(trans.into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn rollup_agg_text_trans<'input>( state: Internal, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let value = match value { None => return Some(state), Some(v) => v, }; rollup_agg_text_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn rollup_agg_text_trans_inner( state: Option>, value: SpaceSavingTextAggregate, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let trans = (&value, &fcinfo).into(); if let Some(state) = state { Some(SpaceSavingTransState::combine(&state, &trans).into()) } else { Some(trans.into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn space_saving_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { space_saving_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn space_saving_combine_inner( a: Option>, b: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (a, b) { (Some(a), Some(b)) => Some(SpaceSavingTransState::combine(&a, &b).into()), (Some(a), None) => Some(a.clone().into()), (None, Some(b)) => Some(b.clone().into()), (None, None) => None, }) } } #[pg_extern(immutable, parallel_safe)] fn space_saving_final( state: Internal, _fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { let state: Option<&SpaceSavingTransState> = unsafe { state.get() }; state.map(SpaceSavingAggregate::from) } #[pg_extern(immutable, parallel_safe)] fn space_saving_bigint_final( state: Internal, _fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { let state: Option<&SpaceSavingTransState> = unsafe { state.get() }; state.map(SpaceSavingBigIntAggregate::from) } #[pg_extern(immutable, parallel_safe)] fn space_saving_text_final( state: Internal, _fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { let state: Option<&SpaceSavingTransState> = unsafe { state.get() }; state.map(SpaceSavingTextAggregate::from) } #[pg_extern(immutable, parallel_safe)] fn space_saving_serialize(state: Internal) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn space_saving_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: SpaceSavingTransState = crate::do_deserialize!(bytes, SpaceSavingTransState); Inner::from(i).internal() } extension_sql!( "\n\ CREATE AGGREGATE toolkit_experimental.raw_freq_agg(\n\ frequency double precision, value AnyElement\n\ ) (\n\ sfunc = toolkit_experimental.freq_agg_trans,\n\ stype = internal,\n\ finalfunc = space_saving_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "freq_agg", requires = [ freq_agg_trans, space_saving_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE toolkit_experimental.freq_agg(\n\ frequency double precision, value INT8\n\ ) (\n\ sfunc = toolkit_experimental.freq_agg_bigint_trans,\n\ stype = internal,\n\ finalfunc = space_saving_bigint_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "freq_bigint_agg", requires = [ freq_agg_bigint_trans, space_saving_bigint_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE toolkit_experimental.freq_agg(\n\ frequency double precision, value TEXT\n\ ) (\n\ sfunc = toolkit_experimental.freq_agg_text_trans,\n\ stype = internal,\n\ finalfunc = space_saving_text_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "freq_text_agg", requires = [ freq_agg_text_trans, space_saving_text_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE raw_mcv_agg(\n\ count integer, value AnyElement\n\ ) (\n\ sfunc = mcv_agg_trans,\n\ stype = internal,\n\ finalfunc = space_saving_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "mcv_agg", requires = [ mcv_agg_trans, space_saving_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE mcv_agg(\n\ count integer, value INT8\n\ ) (\n\ sfunc = mcv_agg_bigint_trans,\n\ stype = internal,\n\ finalfunc = space_saving_bigint_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "mcv_bigint_agg", requires = [ mcv_agg_bigint_trans, space_saving_bigint_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE mcv_agg(\n\ count integer, value TEXT\n\ ) (\n\ sfunc = mcv_agg_text_trans,\n\ stype = internal,\n\ finalfunc = space_saving_text_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "mcv_text_agg", requires = [ mcv_agg_text_trans, space_saving_text_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE raw_mcv_agg(\n\ count integer, skew double precision, value AnyElement\n\ ) (\n\ sfunc = mcv_agg_with_skew_trans,\n\ stype = internal,\n\ finalfunc = space_saving_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "mcv_agg_with_skew", requires = [ mcv_agg_with_skew_trans, space_saving_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE mcv_agg(\n\ count integer, skew double precision, value int8\n\ ) (\n\ sfunc = mcv_agg_with_skew_bigint_trans,\n\ stype = internal,\n\ finalfunc = space_saving_bigint_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "mcv_agg_with_skew_bigint", requires = [ mcv_agg_with_skew_bigint_trans, space_saving_bigint_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE mcv_agg(\n\ count integer, skew double precision, value text\n\ ) (\n\ sfunc = mcv_agg_with_skew_text_trans,\n\ stype = internal,\n\ finalfunc = space_saving_text_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "mcv_agg_with_skew_text", requires = [ mcv_agg_with_skew_text_trans, space_saving_text_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ agg SpaceSavingAggregate\n\ ) (\n\ sfunc = rollup_agg_trans,\n\ stype = internal,\n\ finalfunc = space_saving_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "freq_agg_rollup", requires = [ rollup_agg_trans, space_saving_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ agg SpaceSavingBigIntAggregate\n\ ) (\n\ sfunc = rollup_agg_bigint_trans,\n\ stype = internal,\n\ finalfunc = space_saving_bigint_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "freq_agg_bigint_rollup", requires = [ rollup_agg_bigint_trans, space_saving_bigint_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ agg SpaceSavingTextAggregate\n\ ) (\n\ sfunc = rollup_agg_text_trans,\n\ stype = internal,\n\ finalfunc = space_saving_text_final,\n\ combinefunc = space_saving_combine,\n\ serialfunc = space_saving_serialize,\n\ deserialfunc = space_saving_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "freq_agg_text_rollup", requires = [ rollup_agg_text_trans, space_saving_text_final, space_saving_combine, space_saving_serialize, space_saving_deserialize ], ); #[pg_extern(immutable, parallel_safe, name = "into_values")] pub fn freq_iter<'a>( agg: SpaceSavingAggregate<'a>, ty: AnyElement, ) -> TableIterator< 'a, ( name!(value, AnyElement), name!(min_freq, f64), name!(max_freq, f64), ), > { unsafe { if ty.oid().to_u32() != agg.type_oid { pgrx::error!("mischatched types") } let counts = agg.counts.slice().iter().zip(agg.overcounts.slice().iter()); TableIterator::new(agg.datums.clone().into_iter().zip(counts).map_while( move |(value, (&count, &overcount))| { let total = agg.values_seen as f64; let value = AnyElement::from_polymorphic_datum(value, false, Oid::from(agg.type_oid)) .unwrap(); let min_freq = (count - overcount) as f64 / total; let max_freq = count as f64 / total; Some((value, min_freq, max_freq)) }, )) } } #[pg_extern(immutable, parallel_safe, name = "into_values")] pub fn freq_bigint_iter<'a>( agg: SpaceSavingBigIntAggregate<'a>, ) -> TableIterator< 'a, ( name!(value, i64), name!(min_freq, f64), name!(max_freq, f64), ), > { let counts = agg.counts.slice().iter().zip(agg.overcounts.slice().iter()); TableIterator::new(agg.datums.clone().into_iter().zip(counts).map_while( move |(value, (&count, &overcount))| { let total = agg.values_seen as f64; let min_freq = (count - overcount) as f64 / total; let max_freq = count as f64 / total; Some((value, min_freq, max_freq)) }, )) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_freq_bigint_iter<'a>( agg: SpaceSavingBigIntAggregate<'a>, _accessor: AccessorIntoValues, ) -> TableIterator< 'a, ( name!(value, i64), name!(min_freq, f64), name!(max_freq, f64), ), > { freq_bigint_iter(agg) } #[pg_extern(immutable, parallel_safe, name = "into_values")] pub fn freq_text_iter<'a>( agg: SpaceSavingTextAggregate<'a>, ) -> TableIterator< 'a, ( name!(value, String), name!(min_freq, f64), name!(max_freq, f64), ), > { let counts = agg.counts.slice().iter().zip(agg.overcounts.slice().iter()); TableIterator::new(agg.datums.clone().into_iter().zip(counts).map_while( move |(value, (&count, &overcount))| { let total = agg.values_seen as f64; let data = unsafe { varlena_to_string(value.cast_mut_ptr()) }; let min_freq = (count - overcount) as f64 / total; let max_freq = count as f64 / total; Some((data, min_freq, max_freq)) }, )) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_freq_text_iter<'a>( agg: SpaceSavingTextAggregate<'a>, _accessor: AccessorIntoValues, ) -> TableIterator< 'a, ( name!(value, String), name!(min_freq, f64), name!(max_freq, f64), ), > { freq_text_iter(agg) } fn validate_topn_for_mcv_agg( n: i32, topn: u32, skew: f64, total_vals: u64, counts: impl Iterator, ) { if topn == 0 { // Not a mcv aggregate return; } // TODO: should we allow this if we have enough data? if n > topn as i32 { pgrx::error!( "requested N ({}) exceeds creation parameter of mcv aggregate ({})", n, topn ) } // For mcv_aggregates distributions we check that the top 'n' values satisfy the cumulative distribution // for our zeta curve. let needed_count = (zeta_le_n(skew, n as u64) * total_vals as f64).ceil() as u64; if counts.take(n as usize).sum::() < needed_count { pgrx::error!("data is not skewed enough to find top {} parameters with a skew of {}, try reducing the skew factor", n , skew) } } #[pg_extern(immutable, parallel_safe)] pub fn topn( agg: SpaceSavingAggregate<'_>, n: i32, ty: Option, ) -> SetOfIterator<'_, AnyElement> { // If called with a NULL, assume type matches if ty.is_some() && ty.unwrap().oid().to_u32() != agg.type_oid { pgrx::error!("mischatched types") } validate_topn_for_mcv_agg( n, agg.topn as u32, agg.freq_param, agg.values_seen, agg.counts.iter(), ); let min_freq = if agg.topn == 0 { agg.freq_param } else { 0. }; let type_oid: u32 = agg.type_oid; SetOfIterator::new( TopNIterator::new( agg.datums.clone().into_iter(), agg.counts.clone().into_vec(), agg.values_seen as f64, n, min_freq, ) // TODO Shouldn't failure to convert to AnyElement cause error, not early stop? .map_while(move |value| unsafe { AnyElement::from_polymorphic_datum(value, false, Oid::from(type_oid)) }), ) } #[pg_extern(immutable, parallel_safe, name = "topn")] pub fn default_topn( agg: SpaceSavingAggregate<'_>, ty: Option, ) -> SetOfIterator<'_, AnyElement> { if agg.topn == 0 { pgrx::error!("frequency aggregates require a N parameter to topn") } let n = agg.topn as i32; topn(agg, n, ty) } #[pg_extern(immutable, parallel_safe, name = "topn")] pub fn topn_bigint(agg: SpaceSavingBigIntAggregate<'_>, n: i32) -> SetOfIterator<'_, i64> { validate_topn_for_mcv_agg( n, agg.topn, agg.freq_param, agg.values_seen, agg.counts.iter(), ); let min_freq = if agg.topn == 0 { agg.freq_param } else { 0. }; SetOfIterator::new(TopNIterator::new( agg.datums.clone().into_iter(), agg.counts.clone().into_vec(), agg.values_seen as f64, n, min_freq, )) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_topn_bigint<'a>( agg: SpaceSavingBigIntAggregate<'a>, accessor: AccessorTopNCount, ) -> SetOfIterator<'a, i64> { topn_bigint(agg, accessor.count as i32) } #[pg_extern(immutable, parallel_safe, name = "topn")] pub fn default_topn_bigint(agg: SpaceSavingBigIntAggregate<'_>) -> SetOfIterator<'_, i64> { if agg.topn == 0 { pgrx::error!("frequency aggregates require a N parameter to topn") } let n = agg.topn as i32; topn_bigint(agg, n) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_default_topn_bigint<'a>( agg: SpaceSavingBigIntAggregate<'a>, _accessor: AccessorTopn, ) -> SetOfIterator<'a, i64> { default_topn_bigint(agg) } #[pg_extern(immutable, parallel_safe, name = "topn")] pub fn topn_text(agg: SpaceSavingTextAggregate<'_>, n: i32) -> SetOfIterator<'_, String> { validate_topn_for_mcv_agg( n, agg.topn, agg.freq_param, agg.values_seen, agg.counts.iter(), ); let min_freq = if agg.topn == 0 { agg.freq_param } else { 0. }; SetOfIterator::new( TopNIterator::new( agg.datums.clone().into_iter(), agg.counts.clone().into_vec(), agg.values_seen as f64, n, min_freq, ) .map(|value| unsafe { varlena_to_string(value.cast_mut_ptr()) }), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_topn_text<'a>( agg: SpaceSavingTextAggregate<'a>, accessor: AccessorTopNCount, ) -> SetOfIterator<'a, String> { topn_text(agg, accessor.count as i32) } #[pg_extern(immutable, parallel_safe, name = "topn")] pub fn default_topn_text(agg: SpaceSavingTextAggregate<'_>) -> SetOfIterator<'_, String> { if agg.topn == 0 { pgrx::error!("frequency aggregates require a N parameter to topn") } let n = agg.topn as i32; topn_text(agg, n) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_default_topn_text<'a>( agg: SpaceSavingTextAggregate<'a>, _accessor: AccessorTopn, ) -> SetOfIterator<'a, String> { default_topn_text(agg) } #[pg_extern(immutable, parallel_safe)] pub fn max_frequency(agg: SpaceSavingAggregate<'_>, value: AnyElement) -> f64 { let value: PgAnyElement = value.into(); match agg .datums .iter() .position(|datum| value == (datum, Oid::from(agg.type_oid)).into()) { Some(idx) => agg.counts.slice()[idx] as f64 / agg.values_seen as f64, None => 0., } } #[pg_extern(immutable, parallel_safe)] pub fn min_frequency(agg: SpaceSavingAggregate<'_>, value: AnyElement) -> f64 { let value: PgAnyElement = value.into(); match agg .datums .iter() .position(|datum| value == (datum, Oid::from(agg.type_oid)).into()) { Some(idx) => { (agg.counts.slice()[idx] - agg.overcounts.slice()[idx]) as f64 / agg.values_seen as f64 } None => 0., } } #[pg_extern(immutable, parallel_safe, name = "max_frequency")] pub fn max_bigint_frequency(agg: SpaceSavingBigIntAggregate<'_>, value: i64) -> f64 { match agg.datums.iter().position(|datum| value == datum) { Some(idx) => agg.counts.slice()[idx] as f64 / agg.values_seen as f64, None => 0., } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_max_bigint_frequency<'a>( agg: SpaceSavingBigIntAggregate<'a>, accessor: AccessorMaxFrequencyInt, ) -> f64 { max_bigint_frequency(agg, accessor.value) } #[pg_extern(immutable, parallel_safe, name = "min_frequency")] pub fn min_bigint_frequency(agg: SpaceSavingBigIntAggregate<'_>, value: i64) -> f64 { match agg.datums.iter().position(|datum| value == datum) { Some(idx) => { (agg.counts.slice()[idx] - agg.overcounts.slice()[idx]) as f64 / agg.values_seen as f64 } None => 0., } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_min_bigint_frequency<'a>( agg: SpaceSavingBigIntAggregate<'a>, accessor: AccessorMinFrequencyInt, ) -> f64 { min_bigint_frequency(agg, accessor.value) } // Still needs an arrow operator defined, but the text datum input is a bit finicky. #[pg_extern(immutable, parallel_safe, name = "max_frequency")] pub fn max_text_frequency(agg: SpaceSavingTextAggregate<'_>, value: text) -> f64 { let value: PgAnyElement = (value.0, pg_sys::TEXTOID).into(); match agg .datums .iter() .position(|datum| value == (datum, pg_sys::TEXTOID).into()) { Some(idx) => agg.counts.slice()[idx] as f64 / agg.values_seen as f64, None => 0., } } // Still needs an arrow operator defined, but the text datum input is a bit finicky. #[pg_extern(immutable, parallel_safe, name = "min_frequency")] pub fn min_text_frequency(agg: SpaceSavingTextAggregate<'_>, value: text) -> f64 { let value: PgAnyElement = (value.0, pg_sys::TEXTOID).into(); match agg .datums .iter() .position(|datum| value == (datum, pg_sys::TEXTOID).into()) { Some(idx) => { (agg.counts.slice()[idx] - agg.overcounts.slice()[idx]) as f64 / agg.values_seen as f64 } None => 0., } } struct TopNIterator> { datums_iter: InputIterator, counts_iter: std::vec::IntoIter, values_seen: f64, max_n: u32, min_freq: f64, i: u32, } impl> TopNIterator { fn new( datums_iter: InputIterator, counts: Vec, values_seen: f64, max_n: i32, min_freq: f64, ) -> Self { Self { datums_iter, counts_iter: counts.into_iter(), values_seen, max_n: max_n as u32, min_freq, i: 0, } } } impl> Iterator for TopNIterator { type Item = Input; fn next(&mut self) -> Option { match (self.datums_iter.next(), self.counts_iter.next()) { (Some(value), Some(count)) => { self.i += 1; if self.i > self.max_n || count as f64 / self.values_seen < self.min_freq { None } else { Some(value) } } _ => None, } } } unsafe fn varlena_to_string(vl: *const pg_sys::varlena) -> String { let bytes: &[u8] = varlena_to_byte_slice(vl); let s = std::str::from_utf8(bytes).expect("Error creating string from text data"); s.into() } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; use rand::distributions::{Distribution, Uniform}; use rand::prelude::SliceRandom; use rand::thread_rng; use rand::RngCore; use rand_distr::Zeta; #[pg_test] fn test_freq_aggregate() { Spi::connect_mut(|client| { // using the search path trick for this test to make it easier to stabilize later on let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update( "CREATE TABLE test (data INTEGER, time TIMESTAMPTZ)", None, &[], ) .unwrap(); for i in (0..100).rev() { client.update(&format!("INSERT INTO test SELECT i, '2020-1-1'::TIMESTAMPTZ + ('{} days, ' || i::TEXT || ' seconds')::INTERVAL FROM generate_series({i}, 99, 1) i", 100 - i), None, &[]).unwrap(); } let test = client.update("SELECT freq_agg(0.015, s.data)::TEXT FROM (SELECT data FROM test ORDER BY time) s", None, &[]) .unwrap().first() .get_one::().unwrap().unwrap(); let expected = "(version:1,num_values:67,topn:0,values_seen:5050,freq_param:0.015,counts:[100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67],overcounts:[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66],datums:[99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66])"; assert_eq!(test, expected); let test = client.update("SELECT raw_freq_agg(0.015, s.data)::TEXT FROM (SELECT data FROM test ORDER BY time) s", None, &[]) .unwrap().first() .get_one::().unwrap().unwrap(); let expected = "(version:1,type_oid:23,num_values:67,values_seen:5050,freq_param:0.015,topn:0,counts:[100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67,67],overcounts:[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66,66],datums:[23,\"99\",\"98\",\"97\",\"96\",\"95\",\"94\",\"93\",\"92\",\"91\",\"90\",\"89\",\"88\",\"87\",\"86\",\"85\",\"84\",\"83\",\"82\",\"81\",\"80\",\"79\",\"78\",\"77\",\"76\",\"75\",\"74\",\"73\",\"72\",\"71\",\"70\",\"69\",\"68\",\"67\",\"33\",\"34\",\"35\",\"36\",\"37\",\"38\",\"39\",\"40\",\"41\",\"42\",\"43\",\"44\",\"45\",\"46\",\"47\",\"48\",\"49\",\"50\",\"51\",\"52\",\"53\",\"54\",\"55\",\"56\",\"57\",\"58\",\"59\",\"60\",\"61\",\"62\",\"63\",\"64\",\"65\",\"66\"])"; assert_eq!(test, expected); }); } #[pg_test] fn test_topn_aggregate() { Spi::connect_mut(|client| { // using the search path trick for this test to make it easier to stabilize later on let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update( "CREATE TABLE test (data INTEGER, time TIMESTAMPTZ)", None, &[], ) .unwrap(); for i in (0..200).rev() { client.update(&format!("INSERT INTO test SELECT i, '2020-1-1'::TIMESTAMPTZ + ('{} days, ' || i::TEXT || ' seconds')::INTERVAL FROM generate_series({i}, 199, 1) i", 200 - i), None, &[]).unwrap(); } let test = client .update( "SELECT mcv_agg(10, s.data)::TEXT FROM (SELECT data FROM test ORDER BY time) s", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let expected = "(version:1,num_values:110,topn:10,values_seen:20100,freq_param:1.1,counts:[200,199,198,197,196,195,194,193,192,191,190,189,188,187,186,185,184,183,182,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181,181],overcounts:[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180,180],datums:[199,198,197,196,195,194,193,192,191,190,189,188,187,186,185,184,183,182,181,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180])"; assert_eq!(test, expected); }); } #[pg_test] fn explicit_aggregate_test() { let freq = 0.0625; let fcinfo = std::ptr::null_mut(); // dummy value, will use default collation let mut state = None.into(); for i in 11..=20 { for j in i..=20 { let value = unsafe { AnyElement::from_polymorphic_datum( pg_sys::Datum::from(j), false, pg_sys::INT4OID, ) }; state = super::freq_agg_trans(state, freq, value, fcinfo).unwrap(); } } let first = super::space_saving_serialize(state); let bytes = unsafe { std::slice::from_raw_parts( vardata_any(first.0.cast_mut_ptr()) as *const u8, varsize_any_exhdr(first.0.cast_mut_ptr()), ) }; let expected = [ 1, 1, // versions 15, 0, 0, 0, 0, 0, 0, 0, // size hint for sequence 55, 0, 0, 0, 0, 0, 0, 0, // elements seen 0, 0, 0, 0, 0, 0, 176, 63, // frequency (f64 encoding of 0.0625) 17, 0, 0, 0, // elements tracked 0, 0, 0, 0, // topn 7, 0, 0, 0, 1, 1, 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 11, 0, 0, 0, 0, 0, 0, 0, 101, 110, 95, 85, 83, 46, 85, 84, 70, 45, 56, // INT4 hasher 2, 0, 0, 0, 0, 0, 0, 0, 50, 48, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 20, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 57, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 19, count 9, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 56, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 18, count 8, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 55, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 17, count 7, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 54, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 16, count 6, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 53, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 15, count 5, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 52, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 14, count 4, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 51, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 13, count 3, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 50, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 12, count 2, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 49, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 11, count 1, overcount 0 ]; // encoding of hasher can vary on platform and across postgres version (even in length), ignore it and check the other fields let prefix_len = 8 * 4 + 2; let suffix_len = (8 + 2 + 16) * 10; assert_eq!(bytes[..prefix_len], expected[..prefix_len]); assert_eq!( bytes[bytes.len() - suffix_len..], expected[expected.len() - suffix_len..] ); state = None.into(); for i in (1..=10).rev() { // reverse here introduces less error in the aggregate for j in i..=20 { let value = unsafe { AnyElement::from_polymorphic_datum( pg_sys::Datum::from(j), false, pg_sys::INT4OID, ) }; state = super::freq_agg_trans(state, freq, value, fcinfo).unwrap(); } } let second = super::space_saving_serialize(state); let bytes: &[u8] = unsafe { std::slice::from_raw_parts( vardata_any(second.0.cast_mut_ptr()) as *const u8, varsize_any_exhdr(second.0.cast_mut_ptr()), ) }; let expected: [u8; 513] = [ 1, 1, // versions 22, 0, 0, 0, 0, 0, 0, 0, // size hint for sequence 155, 0, 0, 0, 0, 0, 0, 0, // elements seen 0, 0, 0, 0, 0, 0, 176, 63, // frequency (f64 encoding of 0.0625) 17, 0, 0, 0, // elements tracked 0, 0, 0, 0, // topn 7, 0, 0, 0, 1, 1, 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 11, 0, 0, 0, 0, 0, 0, 0, 101, 110, 95, 85, 83, 46, 85, 84, 70, 45, 56, // INT4 hasher 2, 0, 0, 0, 0, 0, 0, 0, 49, 48, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 10, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 49, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 11, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 50, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 12, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 51, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 13, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 52, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 14, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 53, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 15, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 54, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 16, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 55, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 17, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 56, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 18, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 57, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 19, count 10, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 50, 48, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 20, count 10, overcount 0 1, 0, 0, 0, 0, 0, 0, 0, 57, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 9, count 9, overcount 0 1, 0, 0, 0, 0, 0, 0, 0, 56, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 8, count 8, overcount 0 1, 0, 0, 0, 0, 0, 0, 0, 52, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 4, count 7, overcount 6 1, 0, 0, 0, 0, 0, 0, 0, 53, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 5, count 7, overcount 6 1, 0, 0, 0, 0, 0, 0, 0, 54, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 6, count 7, overcount 6 1, 0, 0, 0, 0, 0, 0, 0, 55, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 7, count 7, overcount 6 ]; // encoding of hasher can vary on platform and across postgres version (even in length), ignore it and check the other fields let suffix_len = (8 + 2 + 16) * 11 + (8 + 1 + 16) * 6; assert_eq!(bytes[..prefix_len], expected[..prefix_len]); assert_eq!( bytes[bytes.len() - suffix_len..], expected[expected.len() - suffix_len..] ); let combined = super::space_saving_serialize( super::space_saving_combine( super::space_saving_deserialize(first, None.into()).unwrap(), super::space_saving_deserialize(second, None.into()).unwrap(), fcinfo, ) .unwrap(), ); let bytes = unsafe { std::slice::from_raw_parts( vardata_any(combined.0.cast_mut_ptr()) as *const u8, varsize_any_exhdr(combined.0.cast_mut_ptr()), ) }; let expected: [u8; 513] = [ 1, 1, // versions 22, 0, 0, 0, 0, 0, 0, 0, // size hint for sequence 210, 0, 0, 0, 0, 0, 0, 0, // elements seen 0, 0, 0, 0, 0, 0, 176, 63, // frequency (f64 encoding of 0.0625) 17, 0, 0, 0, // elements tracked 0, 0, 0, 0, // topn 7, 0, 0, 0, 1, 1, 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 11, 0, 0, 0, 0, 0, 0, 0, 101, 110, 95, 85, 83, 46, 85, 84, 70, 45, 56, // INT4 hasher 2, 0, 0, 0, 0, 0, 0, 0, 50, 48, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 20, count 20, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 57, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 19, count 19, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 56, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 18, count 18, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 55, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 17, count 17, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 54, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 16, count 16, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 53, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 15, count 15, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 52, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 14, count 14, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 51, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 13, count 13, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 50, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 12, count 12, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 49, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 11, count 11, overcount 0 2, 0, 0, 0, 0, 0, 0, 0, 49, 48, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 10, count 10, overcount 0 1, 0, 0, 0, 0, 0, 0, 0, 57, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 9, count 9, overcount 0 1, 0, 0, 0, 0, 0, 0, 0, 56, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // string 8, count 8, overcount 0 1, 0, 0, 0, 0, 0, 0, 0, 52, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 4, count 7, overcount 6 1, 0, 0, 0, 0, 0, 0, 0, 54, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 6, count 7, overcount 6 1, 0, 0, 0, 0, 0, 0, 0, 53, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 5, count 7, overcount 6 1, 0, 0, 0, 0, 0, 0, 0, 55, 7, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, // string 7, count 7, overcount 6 ]; // encoding of hasher can vary on platform and across postgres version (even in length), ignore it and check the other fields let suffix_len = (8 + 2 + 16) * 11 + (8 + 1 + 16) * 6; assert_eq!(bytes[..prefix_len], expected[..prefix_len]); assert_eq!( bytes[bytes.len() - suffix_len..], expected[expected.len() - suffix_len..] ); } // Setup environment and create table 'test' with some aggregates in table 'aggs' fn setup_with_test_table(client: &mut pgrx::spi::SpiClient) { // using the search path trick for this test to make it easier to stabilize later on let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update( "CREATE TABLE test (data INTEGER, time TIMESTAMPTZ)", None, &[], ) .unwrap(); for i in (0..20).rev() { client.update(&format!("INSERT INTO test SELECT i, '2020-1-1'::TIMESTAMPTZ + ('{} days, ' || i::TEXT || ' seconds')::INTERVAL FROM generate_series({i}, 19, 1) i", 10 - i), None, &[]).unwrap(); } client .update( "CREATE TABLE aggs (name TEXT, agg SPACESAVINGBIGINTAGGREGATE)", None, &[], ) .unwrap(); client.update("INSERT INTO aggs SELECT 'mcv_default', mcv_agg(5, s.data) FROM (SELECT data FROM test ORDER BY time) s", None, &[]).unwrap(); client.update("INSERT INTO aggs SELECT 'mcv_1.5', mcv_agg(5, 1.5, s.data) FROM (SELECT data FROM test ORDER BY time) s", None, &[]).unwrap(); client.update("INSERT INTO aggs SELECT 'mcv_2', mcv_agg(5, 2, s.data) FROM (SELECT data FROM test ORDER BY time) s", None, &[]).unwrap(); client.update("INSERT INTO aggs SELECT 'freq_8', freq_agg(0.08, s.data) FROM (SELECT data FROM test ORDER BY time) s", None, &[]).unwrap(); client.update("INSERT INTO aggs SELECT 'freq_5', freq_agg(0.05, s.data) FROM (SELECT data FROM test ORDER BY time) s", None, &[]).unwrap(); client.update("INSERT INTO aggs SELECT 'freq_2', freq_agg(0.02, s.data) FROM (SELECT data FROM test ORDER BY time) s", None, &[]).unwrap(); } // API tests #[pg_test] fn test_topn() { Spi::connect_mut(|client| { setup_with_test_table(client); // simple tests let rows = client .update( "SELECT topn(agg) FROM aggs WHERE name = 'mcv_default'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 5); let rows = client .update( "SELECT agg -> topn() FROM aggs WHERE name = 'mcv_default'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 5); let rows = client .update( "SELECT topn(agg, 5) FROM aggs WHERE name = 'freq_5'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 5); // can limit below topn_agg value let rows = client .update( "SELECT topn(agg, 3) FROM aggs WHERE name = 'mcv_default'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 3); let rows = client .update( "SELECT agg -> topn(3) FROM aggs WHERE name = 'mcv_default'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 3); // only 4 rows with freq >= 0.08 let rows = client .update( "SELECT topn(agg, 5) FROM aggs WHERE name = 'freq_8'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 4); }); } #[pg_test( error = "data is not skewed enough to find top 0 parameters with a skew of 1.5, try reducing the skew factor" )] fn topn_on_underskewed_mcv_agg() { Spi::connect_mut(|client| { setup_with_test_table(client); client .update( "SELECT topn(agg, 0::int) FROM aggs WHERE name = 'mcv_1.5'", None, &[], ) .unwrap() .count(); }); } #[pg_test(error = "requested N (8) exceeds creation parameter of mcv aggregate (5)")] fn topn_high_n_on_mcv_agg() { Spi::connect_mut(|client| { setup_with_test_table(client); client .update( "SELECT topn(agg, 8) FROM aggs WHERE name = 'mcv_default'", None, &[], ) .unwrap() .count(); }); } #[pg_test(error = "frequency aggregates require a N parameter to topn")] fn topn_requires_n_for_freq_agg() { Spi::connect_mut(|client| { setup_with_test_table(client); assert_eq!( 0, client .update( "SELECT topn(agg) FROM aggs WHERE name = 'freq_2'", None, &[] ) .unwrap() .count(), ); }); } #[pg_test] fn test_into_values() { Spi::connect_mut(|client| { setup_with_test_table(client); let rows = client .update( "SELECT into_values(agg) FROM aggs WHERE name = 'freq_8'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 13); let rows = client .update( "SELECT into_values(agg) FROM aggs WHERE name = 'freq_5'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 20); let rows = client .update( "SELECT into_values(agg) FROM aggs WHERE name = 'freq_2'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 20); let rows = client .update( "SELECT agg -> into_values() FROM aggs WHERE name = 'freq_8'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 13); let rows = client .update( "SELECT agg -> into_values() FROM aggs WHERE name = 'freq_5'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 20); let rows = client .update( "SELECT agg -> into_values() FROM aggs WHERE name = 'freq_2'", None, &[], ) .unwrap() .count(); assert_eq!(rows, 20); }); } #[pg_test] fn test_frequency_getters() { Spi::connect_mut(|client| { setup_with_test_table(client); // simple tests let (min, max) = client.update("SELECT min_frequency(agg, 3), max_frequency(agg, 3) FROM aggs WHERE name = 'freq_2'", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(min.unwrap(), 0.01904761904761905); assert_eq!(max.unwrap(), 0.01904761904761905); let (min, max) = client.update("SELECT min_frequency(agg, 11), max_frequency(agg, 11) FROM aggs WHERE name = 'mcv_default'", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(min.unwrap(), 0.05714285714285714); assert_eq!(max.unwrap(), 0.05714285714285714); let (min, max) = client.update("SELECT agg -> min_frequency(3), agg -> max_frequency(3) FROM aggs WHERE name = 'freq_2'", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(min.unwrap(), 0.01904761904761905); assert_eq!(max.unwrap(), 0.01904761904761905); let (min, max) = client.update("SELECT agg -> min_frequency(11), agg -> max_frequency(11) FROM aggs WHERE name = 'mcv_default'", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(min.unwrap(), 0.05714285714285714); assert_eq!(max.unwrap(), 0.05714285714285714); // missing value let (min, max) = client.update("SELECT min_frequency(agg, 3), max_frequency(agg, 3) FROM aggs WHERE name = 'freq_8'", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(min.unwrap(), 0.); assert_eq!(max.unwrap(), 0.); let (min, max) = client.update("SELECT min_frequency(agg, 20), max_frequency(agg, 20) FROM aggs WHERE name = 'mcv_2'", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(min.unwrap(), 0.); assert_eq!(max.unwrap(), 0.); // noisy value let (min, max) = client.update("SELECT min_frequency(agg, 8), max_frequency(agg, 8) FROM aggs WHERE name = 'mcv_1.5'", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(min.unwrap(), 0.004761904761904762); assert_eq!(max.unwrap(), 0.05238095238095238); }); } #[pg_test] fn test_rollups() { Spi::connect_mut(|client| { client.update( "CREATE TABLE test (raw_data DOUBLE PRECISION, int_data INTEGER, text_data TEXT, bucket INTEGER)", None, &[] ).unwrap(); // Generate an array of 10000 values by taking the probability curve for a // zeta curve with an s of 1.1 for the top 5 values, then adding smaller // amounts of the next 5 most common values, and finally filling with unique values. let mut vals = vec![1; 945]; vals.append(&mut vec![2; 441]); vals.append(&mut vec![3; 283]); vals.append(&mut vec![4; 206]); vals.append(&mut vec![5; 161]); for v in 6..=10 { vals.append(&mut vec![v, 125]); } for v in 0..(10000 - 945 - 441 - 283 - 206 - 161 - (5 * 125)) { vals.push(11 + v); } vals.shuffle(&mut thread_rng()); // Probably not the most efficient way of populating this table... for v in vals { let cmd = format!( "INSERT INTO test SELECT {v}, {v}::INT, {v}::TEXT, FLOOR(RANDOM() * 10)" ); client.update(&cmd, None, &[]).unwrap(); } // No matter how the values are batched into subaggregates, we should always // see the same top 5 values let mut result = client.update( "WITH aggs AS (SELECT bucket, raw_mcv_agg(5, raw_data) as raw_agg FROM test GROUP BY bucket) SELECT topn(rollup(raw_agg), NULL::DOUBLE PRECISION)::TEXT from aggs", None, &[] ).unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("1")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("2")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("3")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("4")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("5")); assert!(result.next().is_none()); let mut result = client.update( "WITH aggs AS (SELECT bucket, mcv_agg(5, int_data) as int_agg FROM test GROUP BY bucket) SELECT topn(rollup(int_agg))::TEXT from aggs", None, &[] ).unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("1")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("2")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("3")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("4")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("5")); assert!(result.next().is_none()); let mut result = client.update( "WITH aggs AS (SELECT bucket, mcv_agg(5, text_data) as text_agg FROM test GROUP BY bucket) SELECT topn(rollup(text_agg))::TEXT from aggs", None, &[] ).unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("1")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("2")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("3")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("4")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("5")); assert!(result.next().is_none()); }); } #[pg_test] fn test_freq_agg_invariant() { // The frequency agg invariant is that any element with frequency >= f will appear in the freq_agg(f) // This test will randomly generate 200 values in the uniform range [0, 99] and check to see any value // that shows up at least 3 times appears in a frequency aggregate created with freq = 0.015 let rand100 = Uniform::new_inclusive(0, 99); let mut rng = rand::thread_rng(); let mut counts = [0; 100]; let mut state = None.into(); let freq = 0.015; let fcinfo = std::ptr::null_mut(); // dummy value, will use default collation for _ in 0..200 { let v = rand100.sample(&mut rng); let value = unsafe { AnyElement::from_polymorphic_datum(pg_sys::Datum::from(v), false, pg_sys::INT4OID) }; state = super::freq_agg_trans(state, freq, value, fcinfo).unwrap(); counts[v] += 1; } let state = space_saving_final(state, fcinfo).unwrap(); let vals: std::collections::HashSet = state.datums.iter().map(|datum| datum.value()).collect(); for (val, &count) in counts.iter().enumerate() { if count >= 3 { assert!(vals.contains(&val)); } } } #[pg_test] fn test_freq_agg_rollup_maintains_invariant() { // The frequency agg invariant is that any element with frequency >= f will appear in the freq_agg(f) // This test will randomly generate 200 values in the uniform range [0, 99] and check to see any value // that shows up at least 3 times appears in a frequency aggregate created with freq = 0.015 let rand100 = Uniform::new_inclusive(0, 99); let mut rng = rand::thread_rng(); let mut counts = [0; 100]; let freq = 0.015; let fcinfo = std::ptr::null_mut(); // dummy value, will use default collation let mut aggs = vec![]; for _ in 0..4 { let mut state = None.into(); for _ in 0..50 { let v = rand100.sample(&mut rng); let value = unsafe { AnyElement::from_polymorphic_datum( pg_sys::Datum::from(v), false, pg_sys::INT4OID, ) }; state = super::freq_agg_trans(state, freq, value, fcinfo).unwrap(); counts[v] += 1; } aggs.push(space_saving_final(state, fcinfo).unwrap()); } let state = { let mut state = None.into(); for agg in aggs { state = super::rollup_agg_trans(state, Some(agg), fcinfo).unwrap(); } space_saving_final(state, fcinfo).unwrap() }; let vals: std::collections::HashSet = state.datums.iter().map(|datum| datum.value()).collect(); for (val, &count) in counts.iter().enumerate() { if count >= 3 { assert!(vals.contains(&val)); } } } #[pg_test] fn test_mcv_agg_invariant() { // The ton agg invariant is that we'll be able to track the top n values for any data // with a distribution at least as skewed as a zeta distribution // To test this we will generate a mcv aggregate with a random skew (1.01 - 2.0) and // n (5-10). We then generate a random sample with skew 5% greater than our aggregate // (this should be enough to keep the sample above the target even with bad luck), and // verify that we correctly identify the top n values. let mut rng = rand::thread_rng(); let n = rng.next_u64() % 6 + 5; let skew = (rng.next_u64() % 100) as f64 / 100. + 1.01; let zeta = Zeta::new(skew * 1.05).unwrap(); let mut counts = [0; 100]; let mut state = None.into(); let fcinfo = std::ptr::null_mut(); // dummy value, will use default collation for _ in 0..100000 { let v = zeta.sample(&mut rng).floor() as usize; if v == usize::MAX { continue; // These tail values can start to add up at low skew values } let value = unsafe { AnyElement::from_polymorphic_datum(pg_sys::Datum::from(v), false, pg_sys::INT4OID) }; state = super::mcv_agg_with_skew_trans(state, n as i32, skew, value, fcinfo).unwrap(); if v < 100 { // anything greater than 100 will not be in the top values counts[v] += 1; } } let state = space_saving_final(state, fcinfo).unwrap(); let value = unsafe { AnyElement::from_polymorphic_datum(Datum::from(0), false, pg_sys::INT4OID) }; let t: Vec = default_topn(state, Some(value.unwrap())).collect(); let agg_topn: Vec = t.iter().map(|x| x.datum().value()).collect(); let mut temp: Vec<(usize, &usize)> = counts.iter().enumerate().collect(); temp.sort_by(|(_, cnt1), (_, cnt2)| cnt2.cmp(cnt1)); // descending order by count let top_vals: Vec = temp.into_iter().map(|(val, _)| val).collect(); for i in 0..n as usize { assert_eq!(agg_topn[i], top_vals[i]); } } } ================================================ FILE: extension/src/gauge_agg.rs ================================================ use pgrx::*; use serde::{Deserialize, Serialize}; use counter_agg::{range::I64Range, GaugeSummaryBuilder, MetricSummary}; use flat_serialize_macro::FlatSerializable; use stats_agg::stats2d::StatsSummary2D; use tspoint::TSPoint; use crate::{ accessors::{ AccessorCorr, AccessorCounterZeroTime, AccessorDelta, AccessorExtrapolatedDelta, AccessorExtrapolatedRate, AccessorIdeltaLeft, AccessorIdeltaRight, AccessorIntercept, AccessorIrateLeft, AccessorIrateRight, AccessorNumChanges, AccessorNumElements, AccessorRate, AccessorSlope, AccessorTimeDelta, AccessorWithBounds, }, aggregate_utils::in_aggregate_context, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, range::{get_range, I64RangeWrapper}, raw::{bytea, tstzrange}, ron_inout_funcs, }; // TODO move to share with counter_agg #[derive(Clone, Debug, PartialEq, Deserialize, Serialize, FlatSerializable)] #[repr(C)] pub struct FlatSummary { stats: StatsSummary2D, first: TSPoint, second: TSPoint, penultimate: TSPoint, last: TSPoint, reset_sum: f64, num_resets: u64, num_changes: u64, bounds: I64RangeWrapper, } #[pg_schema] mod toolkit_experimental { use super::*; pg_type! { #[derive(Debug, PartialEq)] struct GaugeSummary { #[flat_serialize::flatten] summary: FlatSummary, } } impl GaugeSummary { pub(super) fn interpolate( &self, interval_start: i64, interval_len: i64, prev: Option, next: Option, ) -> GaugeSummary { let this = MetricSummary::from(self.clone()); let prev = prev.map(MetricSummary::from); let next = next.map(MetricSummary::from); let prev = if this.first.ts > interval_start { prev.map(|summary| { time_weighted_average::TimeWeightMethod::Linear .interpolate(summary.last, Some(this.first), interval_start) .expect("unable to interpolate lower bound") }) } else { None }; let next = next.map(|summary| { time_weighted_average::TimeWeightMethod::Linear .interpolate( this.last, Some(summary.first), interval_start + interval_len, ) .expect("unable to interpolate upper bound") }); let builder = prev.map(|pt| GaugeSummaryBuilder::new(&pt, None)); let mut builder = builder.map_or_else( || { let mut summary = this.clone(); summary.bounds = None; summary.into() }, |mut builder| { builder .combine(&this) .expect("unable to add data to interpolation"); builder }, ); if let Some(next) = next { builder .add_point(&next) .expect("unable to add final interpolated point"); } builder.build().into() } } ron_inout_funcs!(GaugeSummary); } use toolkit_experimental::*; // TODO reunify with crate::counter_agg::CounterSummaryTransSate // TODO move to crate::metrics::TransState (taking FnOnce()->MetricSummaryBuilder to support both) #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] struct GaugeSummaryTransState { #[serde(skip)] point_buffer: Vec, #[serde(skip)] bounds: Option, // stores bounds until we combine points, after which, the bounds are stored in each summary // We have a summary buffer here in order to deal with the fact that when the cmobine function gets called it // must first build up a buffer of InternalMetricSummaries, then sort them, then call the combine function in // the correct order. summary_buffer: Vec, } impl GaugeSummaryTransState { fn new() -> Self { Self { point_buffer: vec![], bounds: None, summary_buffer: vec![], } } fn push_point(&mut self, value: TSPoint) { self.point_buffer.push(value); } fn combine_points(&mut self) { if self.point_buffer.is_empty() { return; } self.point_buffer.sort_unstable_by_key(|p| p.ts); let mut iter = self.point_buffer.iter(); let mut summary = GaugeSummaryBuilder::new(iter.next().unwrap(), self.bounds); for p in iter { summary .add_point(p) .unwrap_or_else(|e| pgrx::error!("{}", e)); } self.point_buffer.clear(); // TODO build method should check validity // check bounds only after we've combined all the points, so we aren't doing it all the time. if !summary.bounds_valid() { panic!("Metric bounds invalid") } self.summary_buffer.push(summary.build()); } fn push_summary(&mut self, other: &Self) { let sum_iter = other.summary_buffer.iter(); for sum in sum_iter { self.summary_buffer.push(sum.clone()); } } fn combine_summaries(&mut self) { self.combine_points(); if self.summary_buffer.len() <= 1 { return; } self.summary_buffer.sort_unstable_by_key(|s| s.first.ts); let mut sum_iter = self.summary_buffer.drain(..); let first = sum_iter.next().expect("already handled empty case"); let mut new_summary = GaugeSummaryBuilder::from(first); for sum in sum_iter { new_summary .combine(&sum) .unwrap_or_else(|e| pgrx::error!("{}", e)); } self.summary_buffer.push(new_summary.build()); } } #[pg_extern(immutable, parallel_safe, strict, schema = "toolkit_experimental")] fn gauge_summary_trans_serialize(state: Internal) -> bytea { let mut state = state; let state: &mut GaugeSummaryTransState = unsafe { state.get_mut().unwrap() }; state.combine_summaries(); crate::do_serialize!(state) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn gauge_summary_trans_deserialize(bytes: bytea, _internal: Internal) -> Option { gauge_summary_trans_deserialize_inner(bytes).internal() } fn gauge_summary_trans_deserialize_inner(bytes: bytea) -> Inner { let c: GaugeSummaryTransState = crate::do_deserialize!(bytes, GaugeSummaryTransState); c.into() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn gauge_agg_trans( state: Internal, ts: Option, val: Option, bounds: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { gauge_agg_trans_inner(unsafe { state.to_inner() }, ts, val, bounds, fcinfo).internal() } fn gauge_agg_trans_inner( state: Option>, ts: Option, val: Option, bounds: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let p = match (ts, val) { (_, None) => return state, (None, _) => return state, (Some(ts), Some(val)) => TSPoint { ts: ts.into(), val }, }; match state { None => { let mut s = GaugeSummaryTransState::new(); if let Some(r) = bounds { s.bounds = get_range(r.0.cast_mut_ptr()); } s.push_point(p); Some(s.into()) } Some(mut s) => { s.push_point(p); Some(s) } } }) } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn gauge_agg_trans_no_bounds( state: Internal, ts: Option, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { gauge_agg_trans_inner(unsafe { state.to_inner() }, ts, val, None, fcinfo).internal() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn gauge_agg_summary_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { gauge_agg_summary_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } fn gauge_agg_summary_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, value) { (state, None) => state, (None, Some(value)) => { let mut state = GaugeSummaryTransState::new(); state.summary_buffer.push(value.into()); Some(state.into()) } (Some(mut state), Some(value)) => { state.summary_buffer.push(value.into()); Some(state) } }) } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn gauge_agg_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { gauge_agg_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } fn gauge_agg_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { match (state1, state2) { (None, None) => None, (None, Some(state2)) => { let mut s = state2.clone(); s.combine_points(); Some(s.into()) } (Some(state1), None) => { let mut s = state1.clone(); s.combine_points(); Some(s.into()) } //should I make these return themselves? (Some(state1), Some(state2)) => { let mut s1 = state1.clone(); // is there a way to avoid if it doesn't need it? s1.combine_points(); let mut s2 = state2.clone(); s2.combine_points(); s2.push_summary(&s1); Some(s2.into()) } } }) } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn gauge_agg_final(state: Internal, fcinfo: pg_sys::FunctionCallInfo) -> Option { gauge_agg_final_inner(unsafe { state.to_inner() }, fcinfo) } fn gauge_agg_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state.clone(), }; state.combine_summaries(); debug_assert!(state.summary_buffer.len() <= 1); match state.summary_buffer.pop() { None => None, Some(st) => { // there are some edge cases that this should prevent, but I'm not sure it's necessary, we do check the bounds in the functions that use them. if !st.bounds_valid() { panic!("Metric bounds invalid") } Some(GaugeSummary::from(st)) } } }) } } extension_sql!( "\n\ CREATE AGGREGATE toolkit_experimental.gauge_agg( ts timestamptz, value DOUBLE PRECISION, bounds tstzrange )\n\ (\n\ sfunc = toolkit_experimental.gauge_agg_trans,\n\ stype = internal,\n\ finalfunc = toolkit_experimental.gauge_agg_final,\n\ combinefunc = toolkit_experimental.gauge_agg_combine,\n\ serialfunc = toolkit_experimental.gauge_summary_trans_serialize,\n\ deserialfunc = toolkit_experimental.gauge_summary_trans_deserialize,\n\ parallel = restricted\n\ );\n", name = "gauge_agg", requires = [ gauge_agg_trans, gauge_agg_final, gauge_agg_combine, gauge_summary_trans_serialize, gauge_summary_trans_deserialize ], ); // allow calling gauge agg without bounds provided. extension_sql!( "\n\ CREATE AGGREGATE toolkit_experimental.gauge_agg( ts timestamptz, value DOUBLE PRECISION )\n\ (\n\ sfunc = toolkit_experimental.gauge_agg_trans_no_bounds,\n\ stype = internal,\n\ finalfunc = toolkit_experimental.gauge_agg_final,\n\ combinefunc = toolkit_experimental.gauge_agg_combine,\n\ serialfunc = toolkit_experimental.gauge_summary_trans_serialize,\n\ deserialfunc = toolkit_experimental.gauge_summary_trans_deserialize,\n\ parallel = restricted\n\ );\n\ ", name = "gauge_agg2", requires = [ gauge_agg_trans_no_bounds, gauge_agg_final, gauge_agg_combine, gauge_summary_trans_serialize, gauge_summary_trans_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE toolkit_experimental.rollup(gs toolkit_experimental.GaugeSummary)\n\ (\n\ sfunc = toolkit_experimental.gauge_agg_summary_trans,\n\ stype = internal,\n\ finalfunc = toolkit_experimental.gauge_agg_final,\n\ combinefunc = toolkit_experimental.gauge_agg_combine,\n\ serialfunc = toolkit_experimental.gauge_summary_trans_serialize,\n\ deserialfunc = toolkit_experimental.gauge_summary_trans_deserialize,\n\ parallel = restricted\n\ );\n\ ", name = "gauge_rollup", requires = [ gauge_agg_summary_trans, gauge_agg_final, gauge_agg_combine, gauge_summary_trans_serialize, gauge_summary_trans_deserialize ], ); // TODO Reconsider using the same pg_type for counter and gauge aggregates to avoid duplicating all these functions. #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_delta(sketch: GaugeSummary, _accessor: AccessorDelta) -> f64 { delta(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn delta(summary: GaugeSummary) -> f64 { MetricSummary::from(summary).delta() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_gauge_agg_rate(sketch: GaugeSummary, _accessor: AccessorRate) -> Option { rate(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn rate(summary: GaugeSummary) -> Option { MetricSummary::from(summary).rate() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_time_delta(sketch: GaugeSummary, _accessor: AccessorTimeDelta) -> f64 { time_delta(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn time_delta(summary: GaugeSummary) -> f64 { MetricSummary::from(summary).time_delta() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_irate_left(sketch: GaugeSummary, _accessor: AccessorIrateLeft) -> Option { irate_left(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn irate_left(summary: GaugeSummary) -> Option { MetricSummary::from(summary).irate_left() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_irate_right(sketch: GaugeSummary, _accessor: AccessorIrateRight) -> Option { irate_right(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn irate_right(summary: GaugeSummary) -> Option { MetricSummary::from(summary).irate_right() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_idelta_left(sketch: GaugeSummary, _accessor: AccessorIdeltaLeft) -> f64 { idelta_left(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn idelta_left(summary: GaugeSummary) -> f64 { MetricSummary::from(summary).idelta_left() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_idelta_right(sketch: GaugeSummary, _accessor: AccessorIdeltaRight) -> f64 { idelta_right(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn idelta_right(summary: GaugeSummary) -> f64 { MetricSummary::from(summary).idelta_right() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_with_bounds(sketch: GaugeSummary, accessor: AccessorWithBounds) -> GaugeSummary { let mut builder = GaugeSummaryBuilder::from(MetricSummary::from(sketch)); builder.set_bounds(accessor.bounds()); builder.build().into() } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn with_bounds(summary: GaugeSummary, bounds: tstzrange) -> GaugeSummary { // TODO dedup with previous by using apply_bounds unsafe { let ptr = bounds.0.cast_mut_ptr(); let mut builder = GaugeSummaryBuilder::from(MetricSummary::from(summary)); builder.set_bounds(get_range(ptr)); builder.build().into() } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_extrapolated_delta( sketch: GaugeSummary, _accessor: AccessorExtrapolatedDelta, ) -> Option { extrapolated_delta(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn extrapolated_delta(summary: GaugeSummary) -> Option { MetricSummary::from(summary).prometheus_delta().unwrap() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn interpolated_delta( summary: GaugeSummary, start: crate::raw::TimestampTz, interval: crate::raw::Interval, prev: Option, next: Option, ) -> f64 { let interval = crate::datum_utils::interval_to_ms(&start, &interval); MetricSummary::from(summary.interpolate(start.into(), interval, prev, next)).delta() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_extrapolated_rate( sketch: GaugeSummary, _accessor: AccessorExtrapolatedRate, ) -> Option { extrapolated_rate(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn extrapolated_rate(summary: GaugeSummary) -> Option { MetricSummary::from(summary).prometheus_rate().unwrap() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn interpolated_rate( summary: GaugeSummary, start: crate::raw::TimestampTz, interval: crate::raw::Interval, prev: Option, next: Option, ) -> Option { let interval = crate::datum_utils::interval_to_ms(&start, &interval); MetricSummary::from(summary.interpolate(start.into(), interval, prev, next)).rate() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_num_elements(sketch: GaugeSummary, _accessor: AccessorNumElements) -> i64 { num_elements(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn num_elements(summary: GaugeSummary) -> i64 { MetricSummary::from(summary).stats.n as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_num_changes(sketch: GaugeSummary, _accessor: AccessorNumChanges) -> i64 { num_changes(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn num_changes(summary: GaugeSummary) -> i64 { MetricSummary::from(summary).num_changes as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_slope(sketch: GaugeSummary, _accessor: AccessorSlope) -> Option { slope(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn slope(summary: GaugeSummary) -> Option { MetricSummary::from(summary).stats.slope() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_intercept(sketch: GaugeSummary, _accessor: AccessorIntercept) -> Option { intercept(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn intercept(summary: GaugeSummary) -> Option { MetricSummary::from(summary).stats.intercept() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_corr(sketch: GaugeSummary, _accessor: AccessorCorr) -> Option { corr(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn corr(summary: GaugeSummary) -> Option { MetricSummary::from(summary).stats.corr() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] fn arrow_zero_time( sketch: GaugeSummary, __accessor: AccessorCounterZeroTime, ) -> Option { gauge_zero_time(sketch) } #[pg_extern(strict, immutable, parallel_safe, schema = "toolkit_experimental")] fn gauge_zero_time(summary: GaugeSummary) -> Option { Some(((MetricSummary::from(summary).stats.x_intercept()? * 1_000_000.0) as i64).into()) } impl From for MetricSummary { fn from(pg: GaugeSummary) -> Self { Self { first: pg.summary.first, second: pg.summary.second, penultimate: pg.summary.penultimate, last: pg.summary.last, reset_sum: pg.summary.reset_sum, num_resets: pg.summary.num_resets, num_changes: pg.summary.num_changes, stats: pg.summary.stats, bounds: pg.summary.bounds.to_i64range(), } } } impl From for GaugeSummary { fn from(internal: MetricSummary) -> Self { unsafe { flatten!(GaugeSummary { summary: FlatSummary { stats: internal.stats, first: internal.first, second: internal.second, penultimate: internal.penultimate, last: internal.last, reset_sum: internal.reset_sum, num_resets: internal.num_resets, num_changes: internal.num_changes, bounds: I64RangeWrapper::from_i64range(internal.bounds) } }) } } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx_macros::pg_test; use crate::counter_agg::testing::*; use super::*; macro_rules! select_one { ($client:expr, $stmt:expr, $type:ty) => { $client .update($stmt, None, &[]) .unwrap() .first() .get_one::<$type>() .unwrap() .unwrap() }; } #[pg_test] fn round_trip() { Spi::connect_mut(|client| { client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("SET TIME ZONE 'UTC'", None, &[]).unwrap(); let stmt = "INSERT INTO test VALUES\ ('2020-01-01 00:00:00+00', 10.0),\ ('2020-01-01 00:01:00+00', 20.0),\ ('2020-01-01 00:02:00+00', 30.0),\ ('2020-01-01 00:03:00+00', 20.0),\ ('2020-01-01 00:04:00+00', 10.0),\ ('2020-01-01 00:05:00+00', 20.0),\ ('2020-01-01 00:06:00+00', 10.0),\ ('2020-01-01 00:07:00+00', 30.0),\ ('2020-01-01 00:08:00+00', 10.0)"; client.update(stmt, None, &[]).unwrap(); let expected = "(\ version:1,\ summary:(\ stats:(\ n:9,\ sx:5680370160,\ sx2:216000,\ sx3:0,\ sx4:9175680000,\ sy:160,\ sy2:555.5555555555555,\ sy3:1802.4691358024695,\ sy4:59341.563786008235,\ sxy:-600\ ),\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ second:(ts:\"2020-01-01 00:01:00+00\",val:20),\ penultimate:(ts:\"2020-01-01 00:07:00+00\",val:30),\ last:(ts:\"2020-01-01 00:08:00+00\",val:10),\ reset_sum:0,\ num_resets:0,\ num_changes:8,\ bounds:(\ is_present:0,\ has_left:0,\ has_right:0,\ padding:(0,0,0,0,0),\ left:None,\ right:None\ )\ )\ )"; assert_eq!( expected, select_one!( client, "SELECT toolkit_experimental.gauge_agg(ts, val)::TEXT FROM test", String ) ); assert_eq!( expected, select_one!( client, &format!("SELECT '{expected}'::toolkit_experimental.GaugeSummary::TEXT"), String ) ); }); } #[pg_test] fn delta_after_gauge_decrease() { Spi::connect_mut(|client| { decrease(client); let stmt = "SELECT toolkit_experimental.delta(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(-20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn delta_after_gauge_increase() { Spi::connect_mut(|client| { increase(client); let stmt = "SELECT toolkit_experimental.delta(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn delta_after_gauge_decrease_then_increase_to_same_value() { Spi::connect_mut(|client| { decrease_then_increase_to_same_value(client); let stmt = "SELECT toolkit_experimental.delta(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(0.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn delta_after_gauge_increase_then_decrease_to_same_value() { Spi::connect_mut(|client| { increase_then_decrease_to_same_value(client); let stmt = "SELECT toolkit_experimental.delta(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(0.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_gauge_decrease() { Spi::connect_mut(|client| { decrease(client); let stmt = "SELECT toolkit_experimental.idelta_left(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_gauge_increase() { Spi::connect_mut(|client| { increase(client); let stmt = "SELECT toolkit_experimental.idelta_left(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_gauge_increase_then_decrease_to_same_value() { Spi::connect_mut(|client| { increase_then_decrease_to_same_value(client); let stmt = "SELECT toolkit_experimental.idelta_left(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_left_after_gauge_decrease_then_increase_to_same_value() { Spi::connect_mut(|client| { decrease_then_increase_to_same_value(client); let stmt = "SELECT toolkit_experimental.idelta_left(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_gauge_decrease() { Spi::connect_mut(|client| { decrease(client); let stmt = "SELECT toolkit_experimental.idelta_right(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_gauge_increase() { Spi::connect_mut(|client| { increase(client); let stmt = "SELECT toolkit_experimental.idelta_right(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_gauge_increase_then_decrease_to_same_value() { Spi::connect_mut(|client| { increase_then_decrease_to_same_value(client); let stmt = "SELECT toolkit_experimental.idelta_right(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(10.0, select_one!(client, stmt, f64)); }); } #[pg_test] fn idelta_right_after_gauge_decrease_then_increase_to_same_value() { Spi::connect_mut(|client| { decrease_then_increase_to_same_value(client); let stmt = "SELECT toolkit_experimental.idelta_right(toolkit_experimental.gauge_agg(ts, val)) FROM test"; assert_eq!(20.0, select_one!(client, stmt, f64)); }); } // TODO 3rd copy of this... #[track_caller] fn assert_close_enough(p1: &MetricSummary, p2: &MetricSummary) { assert_eq!(p1.first, p2.first, "first"); assert_eq!(p1.second, p2.second, "second"); assert_eq!(p1.penultimate, p2.penultimate, "penultimate"); assert_eq!(p1.last, p2.last, "last"); assert_eq!(p1.num_changes, p2.num_changes, "num_changes"); assert_eq!(p1.num_resets, p2.num_resets, "num_resets"); assert_eq!(p1.stats.n, p2.stats.n, "n"); use approx::assert_relative_eq; assert_relative_eq!(p1.stats.sx, p2.stats.sx); assert_relative_eq!(p1.stats.sx2, p2.stats.sx2); assert_relative_eq!(p1.stats.sy, p2.stats.sy); assert_relative_eq!(p1.stats.sy2, p2.stats.sy2); assert_relative_eq!(p1.stats.sxy, p2.stats.sxy); } #[pg_test] fn rollup() { Spi::connect_mut(|client| { // needed so that GaugeSummary type can be resolved let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); // This tests GaugeSummary::single_value - the old first == last // check erroneously saw 21.0 == 21.0 and called it a single value. let stmt = "INSERT INTO test VALUES('2020-01-01 00:00:00+00', 10.0), ('2020-01-01 00:01:00+00', 21.0), ('2020-01-01 00:01:00+00', 22.0), ('2020-01-01 00:01:00+00', 21.0)"; client.update(stmt, None, &[]).unwrap(); let stmt = "INSERT INTO test VALUES('2020-01-01 00:02:00+00', 10.0), ('2020-01-01 00:03:00+00', 20.0), ('2020-01-01 00:04:00+00', 10.0)"; client.update(stmt, None, &[]).unwrap(); let stmt = "INSERT INTO test VALUES('2020-01-01 00:08:00+00', 30.0), ('2020-01-01 00:10:00+00', 30.0), ('2020-01-01 00:10:30+00', 10.0), ('2020-01-01 00:20:00+00', 40.0)"; client.update(stmt, None, &[]).unwrap(); //combine function works as expected let stmt = "SELECT gauge_agg(ts, val) FROM test"; let a = select_one!(client, stmt, GaugeSummary); let stmt = "WITH t as (SELECT date_trunc('minute', ts), gauge_agg(ts, val) as agg FROM test group by 1 ) SELECT rollup(agg) FROM t"; let b = select_one!(client, stmt, GaugeSummary); assert_close_enough(&a.into(), &b.into()); }); } #[pg_test] fn gauge_agg_interpolation() { Spi::connect_mut(|client| { client.update( "CREATE TABLE test(time timestamptz, value double precision, bucket timestamptz)", None, &[] ).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-1-1 10:00'::timestamptz, 10.0, '2020-1-1'::timestamptz), ('2020-1-1 12:00'::timestamptz, 40.0, '2020-1-1'::timestamptz), ('2020-1-1 16:00'::timestamptz, 20.0, '2020-1-1'::timestamptz), ('2020-1-2 2:00'::timestamptz, 15.0, '2020-1-2'::timestamptz), ('2020-1-2 12:00'::timestamptz, 50.0, '2020-1-2'::timestamptz), ('2020-1-2 20:00'::timestamptz, 25.0, '2020-1-2'::timestamptz), ('2020-1-3 4:00'::timestamptz, 30.0, '2020-1-3'::timestamptz), ('2020-1-3 12:00'::timestamptz, 0.0, '2020-1-3'::timestamptz), ('2020-1-3 16:00'::timestamptz, 35.0, '2020-1-3'::timestamptz)"#, None, &[], ) .unwrap(); let mut deltas = client .update( r#"SELECT toolkit_experimental.interpolated_delta( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, toolkit_experimental.gauge_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, start at 10, interpolated end of day is 16 assert_eq!(deltas.next().unwrap()[1].value().unwrap(), Some(16. - 10.)); // Day 2, interpolated start is 16, interpolated end is 27.5 assert_eq!(deltas.next().unwrap()[1].value().unwrap(), Some(27.5 - 16.)); // Day 3, interpolated start is 27.5, end is 35 assert_eq!(deltas.next().unwrap()[1].value().unwrap(), Some(35. - 27.5)); let mut rates = client .update( r#"SELECT toolkit_experimental.interpolated_rate( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, toolkit_experimental.gauge_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, 14 hours (rate is per second) assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((16. - 10.) / (14. * 60. * 60.)) ); // Day 2, 24 hours assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((27.5 - 16.) / (24. * 60. * 60.)) ); // Day 3, 16 hours assert_eq!( rates.next().unwrap()[1].value().unwrap(), Some((35. - 27.5) / (16. * 60. * 60.)) ); }); } #[pg_test] fn guage_agg_interpolated_delta_with_aligned_point() { Spi::connect_mut(|client| { client.update( "CREATE TABLE test(time timestamptz, value double precision, bucket timestamptz)", None, &[] ).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-1-1 10:00'::timestamptz, 10.0, '2020-1-1'::timestamptz), ('2020-1-1 12:00'::timestamptz, 40.0, '2020-1-1'::timestamptz), ('2020-1-1 16:00'::timestamptz, 20.0, '2020-1-1'::timestamptz), ('2020-1-2 0:00'::timestamptz, 15.0, '2020-1-2'::timestamptz), ('2020-1-2 12:00'::timestamptz, 50.0, '2020-1-2'::timestamptz), ('2020-1-2 20:00'::timestamptz, 25.0, '2020-1-2'::timestamptz)"#, None, &[], ) .unwrap(); let mut deltas = client .update( r#"SELECT toolkit_experimental.interpolated_delta( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, toolkit_experimental.gauge_agg(time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, start at 10, interpolated end of day is 15 (after reset) assert_eq!(deltas.next().unwrap()[1].value().unwrap(), Some(15. - 10.)); // Day 2, start is 15, end is 25 assert_eq!(deltas.next().unwrap()[1].value().unwrap(), Some(25. - 15.)); assert!(deltas.next().is_none()); }); } #[pg_test] fn no_results_on_null_input() { Spi::connect_mut(|client| { // needed so that GaugeSummary type can be resolved let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)", None, &[], ) .unwrap(); let stmt = "INSERT INTO test VALUES (NULL, NULL)"; client.update(stmt, None, &[]).unwrap(); let stmt = "SELECT toolkit_experimental.gauge_agg(ts, val) FROM test"; assert!(client .update(stmt, None, &[]) .unwrap() .first() .get_one::() .unwrap() .is_none()); }); } } ================================================ FILE: extension/src/heartbeat_agg/accessors.rs ================================================ use pgrx::*; use crate::{ flatten, heartbeat_agg::{HeartbeatAgg, HeartbeatAggData}, pg_type, ron_inout_funcs, }; fn empty_agg<'a>() -> HeartbeatAgg<'a> { unsafe { flatten!(HeartbeatAgg { start_time: 0, end_time: 0, last_seen: 0, interval_len: 0, num_intervals: 0, interval_starts: vec!().into(), interval_ends: vec!().into(), }) } } pg_type! { struct HeartbeatInterpolatedUptimeAccessor<'input> { has_prev : u64, prev : HeartbeatAggData<'input>, } } ron_inout_funcs!(HeartbeatInterpolatedUptimeAccessor<'input>); #[pg_extern(immutable, parallel_safe, name = "interpolated_uptime")] fn heartbeat_agg_interpolated_uptime_accessor<'a>( prev: Option>, ) -> HeartbeatInterpolatedUptimeAccessor<'a> { let has_prev = u64::from(prev.is_some()); let prev = prev.unwrap_or_else(empty_agg).0; crate::build! { HeartbeatInterpolatedUptimeAccessor { has_prev, prev, } } } impl<'a> HeartbeatInterpolatedUptimeAccessor<'a> { pub fn pred(&self) -> Option> { if self.has_prev == 0 { None } else { Some(self.prev.clone().into()) } } } pg_type! { struct HeartbeatInterpolatedDowntimeAccessor<'input> { has_prev : u64, prev : HeartbeatAggData<'input>, } } ron_inout_funcs!(HeartbeatInterpolatedDowntimeAccessor<'input>); #[pg_extern(immutable, parallel_safe, name = "interpolated_downtime")] fn heartbeat_agg_interpolated_downtime_accessor<'a>( prev: Option>, ) -> HeartbeatInterpolatedDowntimeAccessor<'a> { let has_prev = u64::from(prev.is_some()); let prev = prev.unwrap_or_else(empty_agg).0; crate::build! { HeartbeatInterpolatedDowntimeAccessor { has_prev, prev, } } } impl<'a> HeartbeatInterpolatedDowntimeAccessor<'a> { pub fn pred(&self) -> Option> { if self.has_prev == 0 { None } else { Some(self.prev.clone().into()) } } } pg_type! { struct HeartbeatInterpolateAccessor<'input> { has_prev : u64, prev : HeartbeatAggData<'input>, } } ron_inout_funcs!(HeartbeatInterpolateAccessor<'input>); #[pg_extern(immutable, parallel_safe, name = "interpolate")] fn heartbeat_agg_interpolate_accessor<'a>( prev: Option>, ) -> HeartbeatInterpolateAccessor<'a> { let has_prev = u64::from(prev.is_some()); let prev = prev.unwrap_or_else(empty_agg).0; crate::build! { HeartbeatInterpolateAccessor { has_prev, prev, } } } impl<'a> HeartbeatInterpolateAccessor<'a> { pub fn pred(&self) -> Option> { if self.has_prev == 0 { None } else { Some(self.prev.clone().into()) } } } pg_type! { struct HeartbeatTrimToAccessor { start : i64, end : i64, } } ron_inout_funcs!(HeartbeatTrimToAccessor); // Note that this is unable to take only a duration, as we don't have the functionality to store // an interval in PG format and are unable to convert it to an int without a reference time. // This is a difference from the inline function. #[pg_extern(immutable, parallel_safe, name = "trim_to")] fn heartbeat_agg_trim_to_accessor( start: crate::raw::TimestampTz, duration: default!(Option, "NULL"), ) -> HeartbeatTrimToAccessor { let end = duration .map(|intv| crate::datum_utils::ts_interval_sum_to_ms(&start, &intv)) .unwrap_or(0); let start = i64::from(start); crate::build! { HeartbeatTrimToAccessor { start, end, } } } ================================================ FILE: extension/src/heartbeat_agg.rs ================================================ use pgrx::iter::TableIterator; use pgrx::*; use crate::{ accessors::{ AccessorDeadRanges, AccessorDowntime, AccessorLiveAt, AccessorLiveRanges, AccessorNumGaps, AccessorNumLiveRanges, AccessorUptime, }, aggregate_utils::in_aggregate_context, datum_utils::interval_to_ms, flatten, palloc::{Inner, InternalAsValue, ToInternal}, pg_type, raw::{Interval, TimestampTz}, ron_inout_funcs, }; use std::cmp::{max, min}; mod accessors; use accessors::{ HeartbeatInterpolateAccessor, HeartbeatInterpolatedDowntimeAccessor, HeartbeatInterpolatedUptimeAccessor, HeartbeatTrimToAccessor, }; const BUFFER_SIZE: usize = 1000; // How many values to absorb before consolidating // Given the lack of a good range map class, or efficient predecessor operation on btrees, // the trans state will simply collect points and then process them in batches pub struct HeartbeatTransState { start: i64, end: i64, last: i64, interval_len: i64, buffer: Vec, liveness: Vec<(i64, i64)>, // sorted array of non-overlapping (start_time, end_time) } impl HeartbeatTransState { pub fn new(start: i64, end: i64, interval: i64) -> Self { assert!(end - start > interval, "all points passed to heartbeat agg must occur in the 'agg_duration' interval after 'agg_start'"); HeartbeatTransState { start, end, last: i64::MIN, interval_len: interval, buffer: vec![], liveness: vec![], } } pub fn insert(&mut self, time: i64) { assert!(time >= self.start && time < self.end, "all points passed to heartbeat agg must occur in the 'agg_duration' interval after 'agg_start'"); if self.buffer.len() >= BUFFER_SIZE { self.process_batch(); } self.buffer.push(time); } pub fn process_batch(&mut self) { if self.buffer.is_empty() { return; } self.buffer.sort_unstable(); if self.last < *self.buffer.last().unwrap() { self.last = *self.buffer.last().unwrap(); } let mut new_intervals = vec![]; let mut start = *self.buffer.first().unwrap(); let mut bound = start + self.interval_len; for heartbeat in std::mem::take(&mut self.buffer).into_iter() { if heartbeat <= bound { bound = heartbeat + self.interval_len; } else { new_intervals.push((start, bound)); start = heartbeat; bound = start + self.interval_len; } } new_intervals.push((start, bound)); if self.liveness.is_empty() { std::mem::swap(&mut self.liveness, &mut new_intervals); } else { self.combine_intervals(new_intervals) } } // In general we shouldn't need to change these creation time parameters, but if // we're combining with another interval this may be necessary. fn extend_covered_interval(&mut self, new_start: i64, new_end: i64) { assert!(new_start <= self.start && new_end >= self.end); // this is guaranteed by the combine function self.start = new_start; // extend last range if able if self.end < new_end && self.last + self.interval_len > self.end { assert!(!self.liveness.is_empty()); // above condition should be impossible without liveness data let last_interval = self.liveness.last_mut().unwrap(); last_interval.1 = min(self.last + self.interval_len, new_end); } self.end = new_end; } fn combine_intervals(&mut self, new_intervals: Vec<(i64, i64)>) { // Optimized path for nonoverlapping, ordered inputs if self.last < new_intervals.first().unwrap().0 { let mut new_intervals = new_intervals.into_iter(); // Grab the first new interval to check for overlap with the existing data let first_new = new_intervals.next().unwrap(); if self.liveness.last().unwrap().1 >= first_new.0 { // Note that the bound of the new interval must be >= the old bound self.liveness.last_mut().unwrap().1 = first_new.1; } else { self.liveness.push(first_new); } for val in new_intervals { self.liveness.push(val); } return; } let new_intervals = new_intervals.into_iter(); let old_intervals = std::mem::take(&mut self.liveness).into_iter(); // In the following while let block, test and control are used to track our two interval iterators. // We will swap them back and forth to try to keep control as the iterator which has provided the current bound. let mut test = new_intervals.peekable(); let mut control = old_intervals.peekable(); while let Some(interval) = if let Some((start1, _)) = control.peek() { if let Some((start2, _)) = test.peek() { let (start, mut bound) = if start1 < start2 { control.next().unwrap() } else { std::mem::swap(&mut test, &mut control); control.next().unwrap() }; while test.peek().is_some() && test.peek().unwrap().0 <= bound { let (_, new_bound) = test.next().unwrap(); if new_bound > bound { std::mem::swap(&mut test, &mut control); bound = new_bound; } } Some((start, bound)) } else { control.next() } } else { test.next() } { self.liveness.push(interval) } } pub fn combine(&mut self, mut other: HeartbeatTransState) { assert!(self.interval_len == other.interval_len); // Nicer error would be nice here self.process_batch(); other.process_batch(); let min_start = min(self.start, other.start); let max_end = max(self.end, other.end); self.extend_covered_interval(min_start, max_end); other.extend_covered_interval(min_start, max_end); self.combine_intervals(other.liveness); self.last = max(self.last, other.last); } } #[cfg(any(test, feature = "pg_test"))] impl HeartbeatTransState { pub fn get_buffer(&self) -> &Vec { &self.buffer } pub fn get_liveness(&self) -> &Vec<(i64, i64)> { &self.liveness } } pg_type! { #[derive(Debug)] struct HeartbeatAgg<'input> { start_time : i64, end_time : i64, last_seen : i64, interval_len : i64, num_intervals : u64, interval_starts : [i64; self.num_intervals], interval_ends : [i64; self.num_intervals], } } ron_inout_funcs!(HeartbeatAgg<'input>); impl HeartbeatAgg<'_> { fn trim_to(self, start: Option, end: Option) -> HeartbeatAgg<'static> { if (start.is_some() && start.unwrap() < self.start_time) || (end.is_some() && end.unwrap() > self.end_time) { error!("Can not query beyond the original aggregate bounds"); } let mut starts: Vec = vec![]; let mut ends: Vec = vec![]; for i in 0..self.num_intervals as usize { starts.push(self.interval_starts.slice()[i]); ends.push(self.interval_ends.slice()[i]); } let low_idx = if let Some(start) = start { let mut idx = 0; while idx < self.num_intervals as usize && ends[idx] < start { idx += 1; } if starts[idx] < start { starts[idx] = start; } idx } else { 0 }; let mut new_last = None; let high_idx = if let Some(end) = end { if self.num_intervals > 0 { let mut idx = self.num_intervals as usize - 1; while idx > low_idx && starts[idx] > end { idx -= 1; } new_last = Some(ends[idx] - self.interval_len); if ends[idx] > end { if end < new_last.unwrap() { new_last = Some(end); } ends[idx] = end; } idx } else { self.num_intervals as usize - 1 } } else { self.num_intervals as usize - 1 }; unsafe { flatten!(HeartbeatAgg { start_time: start.unwrap_or(self.start_time), end_time: end.unwrap_or(self.end_time), last_seen: new_last.unwrap_or(self.last_seen), interval_len: self.interval_len, num_intervals: (high_idx - low_idx + 1) as u64, interval_starts: starts[low_idx..=high_idx].into(), interval_ends: ends[low_idx..=high_idx].into(), }) } } fn sum_live_intervals(self) -> i64 { let starts = self.interval_starts.as_slice(); let ends = self.interval_ends.as_slice(); let mut sum = 0; for i in 0..self.num_intervals as usize { sum += ends[i] - starts[i]; } sum } fn interpolate_start(&mut self, pred: &Self) { // only allow interpolation of non-overlapping ranges assert!(pred.end_time <= self.start_time); let pred_end = pred.last_seen + self.interval_len; if pred_end <= self.start_time { return; } // If first range already covers (start_time, pred_end) return if self .interval_starts .as_slice() .first() .filter(|v| **v == self.start_time) .is_some() && self .interval_ends .as_slice() .first() .filter(|v| **v >= pred_end) .is_some() { return; } if self .interval_starts .as_slice() .first() .filter(|v| **v <= pred_end) .is_some() { self.interval_starts.as_owned()[0] = self.start_time; } else { let start = self.start_time; self.interval_starts.as_owned().insert(0, start); self.interval_ends.as_owned().insert(0, pred_end); self.num_intervals += 1; } } } #[pg_extern] pub fn live_ranges( agg: HeartbeatAgg<'static>, ) -> TableIterator<'static, (name!(start, TimestampTz), name!(end, TimestampTz))> { let starts = agg.interval_starts.clone(); let ends = agg.interval_ends.clone(); TableIterator::new( starts .into_iter() .map(|x| x.into()) .zip(ends.into_iter().map(|x| x.into())), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_live_ranges( sketch: HeartbeatAgg<'static>, _accessor: AccessorLiveRanges, ) -> TableIterator<'static, (name!(start, TimestampTz), name!(end, TimestampTz))> { live_ranges(sketch) } #[pg_extern] pub fn dead_ranges( agg: HeartbeatAgg<'static>, ) -> TableIterator<'static, (name!(start, TimestampTz), name!(end, TimestampTz))> { if agg.num_intervals == 0 { return TableIterator::new(std::iter::once(( agg.start_time.into(), agg.end_time.into(), ))); } // Dead ranges are the opposite of the intervals stored in the aggregate let mut starts = agg.interval_ends.clone().into_vec(); let mut ends = agg.interval_starts.clone().into_vec(); // Fix the first point depending on whether the aggregate starts in a live or dead range if ends[0] == agg.start_time { ends.remove(0); } else { starts.insert(0, agg.start_time); } // Fix the last point depending on whether the aggregate starts in a live or dead range if *starts.last().unwrap() == agg.end_time { starts.pop(); } else { ends.push(agg.end_time); } TableIterator::new( starts .into_iter() .map(|x| x.into()) .zip(ends.into_iter().map(|x| x.into())), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_dead_ranges( sketch: HeartbeatAgg<'static>, _accessor: AccessorDeadRanges, ) -> TableIterator<'static, (name!(start, TimestampTz), name!(end, TimestampTz))> { dead_ranges(sketch) } #[pg_extern] pub fn uptime(agg: HeartbeatAgg<'static>) -> Interval { agg.sum_live_intervals().into() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_uptime( sketch: HeartbeatAgg<'static>, _accessor: AccessorUptime, ) -> Interval { uptime(sketch) } #[pg_extern] pub fn interpolated_uptime( agg: HeartbeatAgg<'static>, pred: Option>, ) -> Interval { uptime(interpolate_heartbeat_agg(agg, pred)) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_interpolated_uptime( sketch: HeartbeatAgg<'static>, accessor: HeartbeatInterpolatedUptimeAccessor<'static>, ) -> Interval { interpolated_uptime(sketch, accessor.pred()) } #[pg_extern] pub fn downtime(agg: HeartbeatAgg<'static>) -> Interval { (agg.end_time - agg.start_time - agg.sum_live_intervals()).into() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_downtime( sketch: HeartbeatAgg<'static>, _accessor: AccessorDowntime, ) -> Interval { downtime(sketch) } #[pg_extern] pub fn interpolated_downtime( agg: HeartbeatAgg<'static>, pred: Option>, ) -> Interval { downtime(interpolate_heartbeat_agg(agg, pred)) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_interpolated_downtime( sketch: HeartbeatAgg<'static>, accessor: HeartbeatInterpolatedDowntimeAccessor<'static>, ) -> Interval { interpolated_downtime(sketch, accessor.pred()) } #[pg_extern] pub fn live_at(agg: HeartbeatAgg<'static>, test: TimestampTz) -> bool { let test = i64::from(test); if test < agg.start_time || test > agg.end_time { error!("unable to test for liveness outside of a heartbeat_agg's covered range") } if agg.num_intervals == 0 { return false; } let mut start_iter = agg.interval_starts.iter().enumerate().peekable(); while let Some((idx, val)) = start_iter.next() { if test < val { // Only possible if test shows up before first interval return false; } if let Some((_, next_val)) = start_iter.peek() { if test < *next_val { return test < agg.interval_ends.as_slice()[idx]; } } } // Fall out the loop if test > start of last interval test < *agg.interval_ends.as_slice().last().unwrap() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_live_at( sketch: HeartbeatAgg<'static>, accessor: AccessorLiveAt, ) -> bool { let ts = TimestampTz(accessor.time.into()); live_at(sketch, ts) } #[pg_extern(name = "interpolate")] fn interpolate_heartbeat_agg( agg: HeartbeatAgg<'static>, pred: Option>, ) -> HeartbeatAgg<'static> { let mut r = agg.clone(); if let Some(pred) = pred { r.interpolate_start(&pred); } r } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_interpolate( sketch: HeartbeatAgg<'static>, accessor: HeartbeatInterpolateAccessor<'static>, ) -> HeartbeatAgg<'static> { interpolate_heartbeat_agg(sketch, accessor.pred()) } #[pg_extern] pub fn num_live_ranges(agg: HeartbeatAgg<'static>) -> i64 { agg.num_intervals as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_num_live_ranges( agg: HeartbeatAgg<'static>, _accessor: AccessorNumLiveRanges, ) -> i64 { num_live_ranges(agg) } #[pg_extern] pub fn num_gaps(agg: HeartbeatAgg<'static>) -> i64 { if agg.num_intervals == 0 { return 1; } let mut count = agg.num_intervals - 1; if agg.interval_starts.slice()[0] != agg.start_time { count += 1; } if agg.interval_ends.slice()[agg.num_intervals as usize - 1] != agg.end_time { count += 1; } count as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_num_gaps(agg: HeartbeatAgg<'static>, _accessor: AccessorNumGaps) -> i64 { num_gaps(agg) } #[pg_extern] pub fn trim_to( agg: HeartbeatAgg<'static>, start: default!(Option, "NULL"), duration: default!(Option, "NULL"), ) -> HeartbeatAgg<'static> { if let Some(start) = start { let end = duration.map(|intv| crate::datum_utils::ts_interval_sum_to_ms(&start, &intv)); agg.trim_to(Some(i64::from(start)), end) } else { let end = duration.map(|intv| { crate::datum_utils::ts_interval_sum_to_ms(&TimestampTz::from(agg.start_time), &intv) }); agg.trim_to(None, end) } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_heartbeat_agg_trim_to( agg: HeartbeatAgg<'static>, accessor: HeartbeatTrimToAccessor, ) -> HeartbeatAgg<'static> { let end = if accessor.end == 0 { None } else { Some(accessor.end) }; agg.trim_to(Some(accessor.start), end) } impl From> for HeartbeatTransState { fn from(agg: HeartbeatAgg<'static>) -> Self { HeartbeatTransState { start: agg.start_time, end: agg.end_time, last: agg.last_seen, interval_len: agg.interval_len, buffer: vec![], liveness: agg .interval_starts .iter() .zip(agg.interval_ends.iter()) .collect(), } } } #[pg_extern(immutable, parallel_safe)] pub fn heartbeat_trans( state: Internal, heartbeat: TimestampTz, start: TimestampTz, length: Interval, liveness_duration: Interval, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { heartbeat_trans_inner( unsafe { state.to_inner() }, heartbeat, start, length, liveness_duration, fcinfo, ) .internal() } pub fn heartbeat_trans_inner( state: Option>, heartbeat: TimestampTz, start: TimestampTz, length: Interval, liveness_duration: Interval, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let mut state = state.unwrap_or_else(|| { let length = interval_to_ms(&start, &length); let interval = interval_to_ms(&start, &liveness_duration); let start = start.into(); HeartbeatTransState::new(start, start + length, interval).into() }); state.insert(heartbeat.into()); Some(state) }) } } #[pg_extern(immutable, parallel_safe)] pub fn heartbeat_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { heartbeat_final_inner(unsafe { state.to_inner() }, fcinfo) } pub fn heartbeat_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { state.map(|mut s| { s.process_batch(); let (starts, mut ends): (Vec, Vec) = s.liveness.clone().into_iter().unzip(); // Trim last interval to end of aggregate's range if let Some(last) = ends.last_mut() { if *last > s.end { *last = s.end; } } flatten!(HeartbeatAgg { start_time: s.start, end_time: s.end, last_seen: s.last, interval_len: s.interval_len, num_intervals: starts.len() as u64, interval_starts: starts.into(), interval_ends: ends.into(), }) }) }) } } #[pg_extern(immutable, parallel_safe)] pub fn heartbeat_rollup_trans( state: Internal, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { heartbeat_rollup_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn heartbeat_rollup_trans_inner( state: Option>, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, value) { (a, None) => a, (None, Some(a)) => Some(HeartbeatTransState::from(a).into()), (Some(mut a), Some(b)) => { a.combine(b.into()); Some(a) } }) } } extension_sql!( "\n\ CREATE AGGREGATE heartbeat_agg(\n\ heartbeat TIMESTAMPTZ, agg_start TIMESTAMPTZ, agg_duration INTERVAL, heartbeat_liveness INTERVAL\n\ ) (\n\ sfunc = heartbeat_trans,\n\ stype = internal,\n\ finalfunc = heartbeat_final\n\ );\n\ ", name = "heartbeat_agg", requires = [ heartbeat_trans, heartbeat_final, ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ HeartbeatAgg\n\ ) (\n\ sfunc = heartbeat_rollup_trans,\n\ stype = internal,\n\ finalfunc = heartbeat_final\n\ );\n\ ", name = "heartbeat_agg_rollup", requires = [heartbeat_rollup_trans, heartbeat_final,], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; #[pg_test] pub fn test_heartbeat_trans_state() { let mut state = HeartbeatTransState::new(0, 500, 10); state.insert(100); state.insert(200); state.insert(250); state.insert(220); state.insert(210); state.insert(300); assert_eq!(state.get_buffer().len(), 6); state.process_batch(); assert_eq!(state.get_buffer().len(), 0); let mut it = state.get_liveness().iter(); assert_eq!(*it.next().unwrap(), (100, 110)); assert_eq!(*it.next().unwrap(), (200, 230)); assert_eq!(*it.next().unwrap(), (250, 260)); assert_eq!(*it.next().unwrap(), (300, 310)); assert!(it.next().is_none()); state.insert(400); state.insert(350); state.process_batch(); let mut it = state.get_liveness().iter(); assert_eq!(*it.next().unwrap(), (100, 110)); assert_eq!(*it.next().unwrap(), (200, 230)); assert_eq!(*it.next().unwrap(), (250, 260)); assert_eq!(*it.next().unwrap(), (300, 310)); assert_eq!(*it.next().unwrap(), (350, 360)); assert_eq!(*it.next().unwrap(), (400, 410)); assert!(it.next().is_none()); state.insert(80); state.insert(190); state.insert(210); state.insert(230); state.insert(240); state.insert(310); state.insert(395); state.insert(408); state.process_batch(); let mut it = state.get_liveness().iter(); assert_eq!(*it.next().unwrap(), (80, 90)); assert_eq!(*it.next().unwrap(), (100, 110)); assert_eq!(*it.next().unwrap(), (190, 260)); assert_eq!(*it.next().unwrap(), (300, 320)); assert_eq!(*it.next().unwrap(), (350, 360)); assert_eq!(*it.next().unwrap(), (395, 418)); assert!(it.next().is_none()); } #[pg_test] pub fn test_heartbeat_agg() { Spi::connect_mut(|client| { client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update("CREATE TABLE liveness(heartbeat TIMESTAMPTZ)", None, &[]) .unwrap(); client .update( "INSERT INTO liveness VALUES ('01-01-2020 0:2:20 UTC'), ('01-01-2020 0:10 UTC'), ('01-01-2020 0:17 UTC'), ('01-01-2020 0:30 UTC'), ('01-01-2020 0:35 UTC'), ('01-01-2020 0:40 UTC'), ('01-01-2020 0:35 UTC'), ('01-01-2020 0:40 UTC'), ('01-01-2020 0:40 UTC'), ('01-01-2020 0:50:30 UTC'), ('01-01-2020 1:00 UTC'), ('01-01-2020 1:08 UTC'), ('01-01-2020 1:18 UTC'), ('01-01-2020 1:28 UTC'), ('01-01-2020 1:38:01 UTC'), ('01-01-2020 1:40 UTC'), ('01-01-2020 1:40:01 UTC'), ('01-01-2020 1:50:01 UTC'), ('01-01-2020 1:57 UTC'), ('01-01-2020 1:59:50 UTC') ", None, &[], ) .unwrap(); let mut result = client.update( "SELECT live_ranges(heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m'))::TEXT FROM liveness", None, &[]).unwrap(); let mut arrow_result = client.update( "SELECT (heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') -> live_ranges())::TEXT FROM liveness", None, &[]).unwrap(); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 00:02:20+00\",\"2020-01-01 00:27:00+00\")" ); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 00:30:00+00\",\"2020-01-01 00:50:00+00\")" ); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 00:50:30+00\",\"2020-01-01 01:38:00+00\")" ); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 01:38:01+00\",\"2020-01-01 02:00:00+00\")" ); assert!(result.next().is_none()); assert!(arrow_result.next().is_none()); let mut result = client.update( "SELECT dead_ranges(heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m'))::TEXT FROM liveness", None, &[]).unwrap(); let mut arrow_result = client.update( "SELECT (heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') -> dead_ranges())::TEXT FROM liveness", None, &[]).unwrap(); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 00:00:00+00\",\"2020-01-01 00:02:20+00\")" ); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 00:27:00+00\",\"2020-01-01 00:30:00+00\")" ); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 00:50:00+00\",\"2020-01-01 00:50:30+00\")" ); let test = arrow_result.next().unwrap()[1] .value::() .unwrap() .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), test ); assert_eq!( test, "(\"2020-01-01 01:38:00+00\",\"2020-01-01 01:38:01+00\")" ); assert!(result.next().is_none()); assert!(arrow_result.next().is_none()); let result = client .update( "SELECT uptime(heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m'))::TEXT FROM liveness", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!("01:54:09", result); let result = client.update( "SELECT (heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') -> uptime())::TEXT FROM liveness", None, &[]).unwrap().first().get_one::().unwrap().unwrap(); assert_eq!("01:54:09", result); let result = client .update( "SELECT downtime(heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m'))::TEXT FROM liveness", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!("00:05:51", result); let result = client.update( "SELECT (heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') -> downtime())::TEXT FROM liveness", None, &[]).unwrap().first().get_one::().unwrap().unwrap(); assert_eq!("00:05:51", result); let (result1, result2, result3) = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness) SELECT live_at(agg, '01-01-2020 00:01:00 UTC')::TEXT, live_at(agg, '01-01-2020 00:05:00 UTC')::TEXT, live_at(agg, '01-01-2020 00:30:00 UTC')::TEXT FROM agg", None, &[]) .unwrap().first() .get_three::().unwrap(); let result4 = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness) SELECT live_at(agg, '01-01-2020 01:38:00 UTC')::TEXT FROM agg", None, &[]) .unwrap().first() .get_one::().unwrap(); assert_eq!(result1.unwrap(), "false"); // outside ranges assert_eq!(result2.unwrap(), "true"); // inside ranges assert_eq!(result3.unwrap(), "true"); // first point of range assert_eq!(result4.unwrap(), "false"); // last point of range let (result1, result2, result3) = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness) SELECT (agg -> live_at('01-01-2020 00:01:00 UTC'))::TEXT, (agg -> live_at('01-01-2020 00:05:00 UTC'))::TEXT, (agg -> live_at('01-01-2020 00:30:00 UTC'))::TEXT FROM agg", None, &[]) .unwrap().first() .get_three::().unwrap(); let result4 = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness) SELECT (agg -> live_at('01-01-2020 01:38:00 UTC'))::TEXT FROM agg", None, &[]) .unwrap().first() .get_one::().unwrap(); assert_eq!(result1.unwrap(), "false"); // outside ranges assert_eq!(result2.unwrap(), "true"); // inside ranges assert_eq!(result3.unwrap(), "true"); // first point of range assert_eq!(result4.unwrap(), "false"); // last point of range let (result1, result2) = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness) SELECT num_live_ranges(agg), num_gaps(agg) FROM agg", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(result1.unwrap(), 4); assert_eq!(result2.unwrap(), 4); let (result1, result2) = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness) SELECT agg->num_live_ranges(), agg->num_gaps() FROM agg", None, &[]) .unwrap().first() .get_two::().unwrap(); assert_eq!(result1.unwrap(), 4); assert_eq!(result2.unwrap(), 4); }) } #[pg_test] pub fn test_heartbeat_rollup() { Spi::connect_mut(|client| { client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update( "CREATE TABLE heartbeats(time timestamptz, batch timestamptz)", None, &[], ) .unwrap(); client.update( "INSERT INTO heartbeats VALUES ('01-01-2020 3:02:20 UTC'::timestamptz, '01-01-2020 3:00:00 UTC'::timestamptz), ('01-01-2020 3:03:10 UTC'::timestamptz, '01-01-2020 3:00:00 UTC'::timestamptz), ('01-01-2020 3:04:07 UTC'::timestamptz, '01-01-2020 3:00:00 UTC'::timestamptz), ('01-01-2020 7:19:20 UTC'::timestamptz, '01-01-2020 7:00:00 UTC'::timestamptz), ('01-01-2020 7:39:20 UTC'::timestamptz, '01-01-2020 7:00:00 UTC'::timestamptz), ('01-01-2020 7:59:20 UTC'::timestamptz, '01-01-2020 7:00:00 UTC'::timestamptz), ('01-01-2020 8:00:10 UTC'::timestamptz, '01-01-2020 8:00:00 UTC'::timestamptz), ('01-01-2020 8:59:10 UTC'::timestamptz, '01-01-2020 8:00:00 UTC'::timestamptz), ('01-01-2020 23:34:20 UTC'::timestamptz, '01-01-2020 23:00:00 UTC'::timestamptz), ('01-01-2020 23:37:20 UTC'::timestamptz, '01-01-2020 23:00:00 UTC'::timestamptz), ('01-01-2020 23:38:05 UTC'::timestamptz, '01-01-2020 23:00:00 UTC'::timestamptz), ('01-01-2020 23:39:00 UTC'::timestamptz, '01-01-2020 23:00:00 UTC'::timestamptz)", None, &[], ).unwrap(); let result = client .update( "WITH aggs AS ( SELECT heartbeat_agg(time, batch, '1h', '1m') FROM heartbeats GROUP BY batch ) SELECT rollup(heartbeat_agg)::TEXT FROM aggs", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!("(version:1,start_time:631162800000000,end_time:631238400000000,last_seen:631237140000000,interval_len:60000000,num_intervals:7,interval_starts:[631162940000000,631178360000000,631179560000000,631180760000000,631184350000000,631236860000000,631237040000000],interval_ends:[631163107000000,631178420000000,631179620000000,631180870000000,631184410000000,631236920000000,631237200000000])", result); }) } #[pg_test] pub fn test_heartbeat_combining_rollup() { Spi::connect_mut(|client| { client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update("CREATE TABLE aggs(agg heartbeatagg)", None, &[]) .unwrap(); client .update( "INSERT INTO aggs SELECT heartbeat_agg(hb, '01-01-2020 UTC', '1h', '10m') FROM (VALUES ('01-01-2020 0:2:20 UTC'::timestamptz), ('01-01-2020 0:10 UTC'::timestamptz), ('01-01-2020 0:17 UTC'::timestamptz), ('01-01-2020 0:30 UTC'::timestamptz), ('01-01-2020 0:35 UTC'::timestamptz), ('01-01-2020 0:40 UTC'::timestamptz), ('01-01-2020 0:50:30 UTC'::timestamptz) ) AS _(hb)", None, &[], ) .unwrap(); client .update( "INSERT INTO aggs SELECT heartbeat_agg(hb, '01-01-2020 0:30 UTC', '1h', '10m') FROM (VALUES ('01-01-2020 0:35 UTC'::timestamptz), ('01-01-2020 0:40 UTC'::timestamptz), ('01-01-2020 0:40 UTC'::timestamptz), ('01-01-2020 1:08 UTC'::timestamptz), ('01-01-2020 1:18 UTC'::timestamptz) ) AS _(hb)", None, &[], ) .unwrap(); client .update( "INSERT INTO aggs SELECT heartbeat_agg(hb, '01-01-2020 1:00 UTC', '1h', '10m') FROM (VALUES ('01-01-2020 1:00 UTC'::timestamptz), ('01-01-2020 1:28 UTC'::timestamptz), ('01-01-2020 1:38:01 UTC'::timestamptz), ('01-01-2020 1:40 UTC'::timestamptz), ('01-01-2020 1:40:01 UTC'::timestamptz), ('01-01-2020 1:50:01 UTC'::timestamptz), ('01-01-2020 1:57 UTC'::timestamptz), ('01-01-2020 1:59:50 UTC'::timestamptz) ) AS _(hb)", None, &[], ) .unwrap(); let mut result = client .update( "SELECT dead_ranges(rollup(agg))::TEXT FROM aggs", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:00:00+00\",\"2020-01-01 00:02:20+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:27:00+00\",\"2020-01-01 00:30:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:50:00+00\",\"2020-01-01 00:50:30+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 01:38:00+00\",\"2020-01-01 01:38:01+00\")" ); assert!(result.next().is_none()); }); } #[pg_test] pub fn test_heartbeat_trim_to() { Spi::connect_mut(|client| { client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update("CREATE TABLE liveness(heartbeat TIMESTAMPTZ)", None, &[]) .unwrap(); client .update( "INSERT INTO liveness VALUES ('01-01-2020 0:2:20 UTC'), ('01-01-2020 0:10 UTC'), ('01-01-2020 0:17 UTC'), ('01-01-2020 0:30 UTC'), ('01-01-2020 0:35 UTC'), ('01-01-2020 0:40 UTC'), ('01-01-2020 0:35 UTC'), ('01-01-2020 0:40 UTC'), ('01-01-2020 0:40 UTC'), ('01-01-2020 0:50:30 UTC'), ('01-01-2020 1:00 UTC'), ('01-01-2020 1:08 UTC'), ('01-01-2020 1:18 UTC'), ('01-01-2020 1:28 UTC'), ('01-01-2020 1:38:01 UTC'), ('01-01-2020 1:40 UTC'), ('01-01-2020 1:40:01 UTC'), ('01-01-2020 1:50:01 UTC'), ('01-01-2020 1:57 UTC'), ('01-01-2020 1:59:50 UTC') ", None, &[], ) .unwrap(); let (result1, result2, result3) = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness), trimmed AS (SELECT trim_to(agg, '01-01-2020 0:30 UTC', '1h') AS agg FROM agg) SELECT uptime(agg)::TEXT, num_gaps(agg), live_at(agg, '01-01-2020 0:50:25 UTC')::TEXT FROM trimmed", None, &[]) .unwrap().first() .get_three::().unwrap(); assert_eq!(result1.unwrap(), "00:59:30"); assert_eq!(result2.unwrap(), 1); assert_eq!(result3.unwrap(), "false"); let (result1, result2, result3) = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness), trimmed AS (SELECT trim_to(agg, duration=>'30m') AS agg FROM agg) SELECT uptime(agg)::TEXT, num_gaps(agg), live_at(agg, '01-01-2020 0:20:25 UTC')::TEXT FROM trimmed", None, &[]) .unwrap().first() .get_three::().unwrap(); assert_eq!(result1.unwrap(), "00:24:40"); assert_eq!(result2.unwrap(), 2); assert_eq!(result3.unwrap(), "true"); let (result1, result2, result3) = client.update( "WITH agg AS (SELECT heartbeat_agg(heartbeat, '01-01-2020 UTC', '2h', '10m') AS agg FROM liveness) SELECT agg -> trim_to('01-01-2020 1:40:00 UTC'::timestamptz) -> num_gaps(), (agg -> trim_to('01-01-2020 00:50:00 UTC'::timestamptz, '30s') -> uptime())::TEXT, agg -> trim_to('01-01-2020 00:28:00 UTC'::timestamptz, '22m15s') -> num_live_ranges() FROM agg", None, &[]) .unwrap().first() .get_three::().unwrap(); assert_eq!(result1.unwrap(), 0); assert_eq!(result2.unwrap(), "00:00:00"); assert_eq!(result3.unwrap(), 1); }); } #[pg_test] pub fn test_heartbeat_agg_interpolation() { Spi::connect_mut(|client| { client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update( "CREATE TABLE liveness(heartbeat TIMESTAMPTZ, start TIMESTAMPTZ)", None, &[], ) .unwrap(); client .update( "INSERT INTO liveness VALUES ('01-01-2020 0:2:20 UTC', '01-01-2020 0:0 UTC'), ('01-01-2020 0:10 UTC', '01-01-2020 0:0 UTC'), ('01-01-2020 0:17 UTC', '01-01-2020 0:0 UTC'), ('01-01-2020 0:30 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:35 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:40 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:35 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:40 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:40 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:50:30 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 1:00:30 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:08 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:18 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:28 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:38:01 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:40 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:40:01 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:50:01 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:57 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:59:50 UTC', '01-01-2020 1:30 UTC') ", None, &[], ) .unwrap(); let mut result = client .update( "WITH s AS ( SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start), t AS ( SELECT start, interpolate(agg, LAG (agg) OVER (ORDER BY start)) AS agg FROM s) SELECT downtime(agg)::TEXT FROM t;", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:05:20" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:30" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:00" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:01" ); assert!(result.next().is_none()); let mut result = client .update( "WITH s AS ( SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start), t AS ( SELECT start, interpolate(agg, LAG (agg) OVER (ORDER BY start)) AS agg FROM s) SELECT live_ranges(agg)::TEXT FROM t;", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:02:20+00\",\"2020-01-01 00:27:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:30:00+00\",\"2020-01-01 00:50:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:50:30+00\",\"2020-01-01 01:00:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 01:00:00+00\",\"2020-01-01 01:30:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 01:30:00+00\",\"2020-01-01 01:38:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 01:38:01+00\",\"2020-01-01 02:00:00+00\")" ); assert!(result.next().is_none()); let mut result = client .update( "WITH s AS ( SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start), t AS ( SELECT start, agg -> interpolate(LAG (agg) OVER (ORDER BY start)) AS agg FROM s) SELECT live_ranges(agg)::TEXT FROM t;", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:02:20+00\",\"2020-01-01 00:27:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:30:00+00\",\"2020-01-01 00:50:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 00:50:30+00\",\"2020-01-01 01:00:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 01:00:00+00\",\"2020-01-01 01:30:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 01:30:00+00\",\"2020-01-01 01:38:00+00\")" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "(\"2020-01-01 01:38:01+00\",\"2020-01-01 02:00:00+00\")" ); assert!(result.next().is_none()); let mut result = client .update( "WITH s AS ( SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start) SELECT interpolated_uptime(agg, LAG (agg) OVER (ORDER BY start))::TEXT FROM s", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:24:40" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:29:30" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:30:00" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:29:59" ); assert!(result.next().is_none()); let mut result = client .update( "WITH s AS ( SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start) SELECT (agg -> interpolated_uptime(LAG (agg) OVER (ORDER BY start)))::TEXT FROM s", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:24:40" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:29:30" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:30:00" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:29:59" ); assert!(result.next().is_none()); let mut result = client .update( "WITH s AS ( SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start) SELECT interpolated_downtime(agg, LAG (agg) OVER (ORDER BY start))::TEXT FROM s", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:05:20" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:30" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:00" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:01" ); assert!(result.next().is_none()); let mut result = client .update( "WITH s AS ( SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start) SELECT (agg -> interpolated_downtime(LAG (agg) OVER (ORDER BY start)))::TEXT FROM s", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:05:20" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:30" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:00" ); assert_eq!( result.next().unwrap()[1] .value::() .unwrap() .unwrap(), "00:00:01" ); assert!(result.next().is_none()); }) } #[pg_test] fn test_heartbeat_agg_text_io() { Spi::connect_mut(|client| { client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update("CREATE TABLE liveness(heartbeat TIMESTAMPTZ)", None, &[]) .unwrap(); client .update( "INSERT INTO liveness VALUES ('01-01-2020 0:2:20 UTC'), ('01-01-2020 0:10 UTC'), ('01-01-2020 0:17 UTC') ", None, &[], ) .unwrap(); let output = client .update( "SELECT heartbeat_agg(heartbeat, '01-01-2020', '30m', '5m')::TEXT FROM liveness;", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let expected = "(version:1,start_time:631152000000000,end_time:631153800000000,last_seen:631153020000000,interval_len:300000000,num_intervals:3,interval_starts:[631152140000000,631152600000000,631153020000000],interval_ends:[631152440000000,631152900000000,631153320000000])"; assert_eq!(output, Some(expected.into())); let estimate = client .update( &format!("SELECT uptime('{expected}'::heartbeatagg)::TEXT"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(estimate.unwrap().as_str(), "00:15:00"); }); } #[pg_test] fn test_heartbeat_agg_byte_io() { use std::ptr; // Create a heartbeat agg from 0 to 250 with intervals from 40-50, 60-85, and 100-110 let state = heartbeat_trans_inner( None, 40.into(), 0.into(), 250.into(), 10.into(), ptr::null_mut(), ); let state = heartbeat_trans_inner( state, 60.into(), 0.into(), 250.into(), 10.into(), ptr::null_mut(), ); let state = heartbeat_trans_inner( state, 65.into(), 0.into(), 250.into(), 10.into(), ptr::null_mut(), ); let state = heartbeat_trans_inner( state, 75.into(), 0.into(), 250.into(), 10.into(), ptr::null_mut(), ); let state = heartbeat_trans_inner( state, 100.into(), 0.into(), 250.into(), 10.into(), ptr::null_mut(), ); let agg = heartbeat_final_inner(state, ptr::null_mut()) .expect("failed to build finalized heartbeat_agg"); let serial = agg.to_pg_bytes(); let expected = [ 128, 1, 0, 0, // header 1, // version 0, 0, 0, // padding 0, 0, 0, 0, 0, 0, 0, 0, // start_time 250, 0, 0, 0, 0, 0, 0, 0, // end_time 100, 0, 0, 0, 0, 0, 0, 0, // last_seen 10, 0, 0, 0, 0, 0, 0, 0, // interval_len 3, 0, 0, 0, 0, 0, 0, 0, // num_intervals 40, 0, 0, 0, 0, 0, 0, 0, // interval_starts[0] 60, 0, 0, 0, 0, 0, 0, 0, // interval_starts[1] 100, 0, 0, 0, 0, 0, 0, 0, // interval_starts[2] 50, 0, 0, 0, 0, 0, 0, 0, // interval_ends[0] 85, 0, 0, 0, 0, 0, 0, 0, // interval_ends[1] 110, 0, 0, 0, 0, 0, 0, 0, // interval_ends[2] ]; assert_eq!(serial, expected); } #[pg_test] fn test_rollup_overlap() { Spi::connect_mut(|client| { client.update("SET TIMEZONE to UTC", None, &[]).unwrap(); client .update( "CREATE TABLE poc(ts TIMESTAMPTZ, batch TIMESTAMPTZ)", None, &[], ) .unwrap(); client .update( "INSERT INTO poc VALUES ('1-1-2020 0:50 UTC', '1-1-2020 0:00 UTC'), ('1-1-2020 1:10 UTC', '1-1-2020 0:00 UTC'), ('1-1-2020 1:00 UTC', '1-1-2020 1:00 UTC')", None, &[], ) .unwrap(); let output = client .update( "WITH rollups AS ( SELECT heartbeat_agg(ts, batch, '2h', '20m') FROM poc GROUP BY batch ORDER BY batch ) SELECT live_ranges(rollup(heartbeat_agg))::TEXT FROM rollups", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let expected = "(\"2020-01-01 00:50:00+00\",\"2020-01-01 01:30:00+00\")"; assert_eq!(output, Some(expected.into())); }); } } ================================================ FILE: extension/src/hyperloglog.rs ================================================ #![allow(clippy::identity_op)] // clippy gets confused by flat_serialize! enums use std::{ convert::TryInto, hash::{Hash, Hasher}, }; use serde::{Deserialize, Serialize}; use pg_sys::{Datum, Oid}; use pgrx::*; use crate::{ accessors::{AccessorDistinctCount, AccessorStderror}, aggregate_utils::{get_collation, in_aggregate_context}, datum_utils::DatumHashBuilder, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, serialization::{PgCollationId, ShortTypeId}, }; use hyperloglogplusplus::{HyperLogLog as HLL, HyperLogLogStorage}; // pgrx doesn't implement Eq/Hash but it's okay here since we treat Datums as raw bytes #[derive(Debug, Copy, Clone, PartialEq)] struct HashableDatum(Datum); impl Eq for HashableDatum {} #[allow(clippy::derived_hash_with_manual_eq)] // partialeq and hash implementations match impl Hash for HashableDatum { fn hash(&self, state: &mut H) { self.0.value().hash(state) } } #[derive(Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct HyperLogLogTrans { logger: HLL<'static, HashableDatum, DatumHashBuilder>, } use crate::raw::AnyElement; #[pg_extern(immutable, parallel_safe)] pub fn hyperloglog_trans( state: Internal, size: i32, // TODO we want to use crate::raw::AnyElement but it doesn't work for some reason... value: Option, fc: pg_sys::FunctionCallInfo, ) -> Option { // let state: Internal = Internal::from_polymorphic_datum(); hyperloglog_trans_inner(unsafe { state.to_inner() }, size, value, fc, unsafe { pgrx::pg_getarg_type(fc, 2) }) .internal() } const APPROX_COUNT_DISTINCT_DEFAULT_SIZE: i32 = 32768; /// Similar to hyperloglog_trans(), except size is set to a default of 32,768 #[pg_extern(immutable, parallel_safe)] pub fn approx_count_distinct_trans( state: Internal, // TODO we want to use crate::raw::AnyElement but it doesn't work for some reason... value: Option, fc: pg_sys::FunctionCallInfo, ) -> Option { // let state: Internal = Internal::from_polymorphic_datum(); hyperloglog_trans_inner( unsafe { state.to_inner() }, APPROX_COUNT_DISTINCT_DEFAULT_SIZE, value, fc, unsafe { pgrx::pg_getarg_type(fc, 1) }, ) .internal() } pub fn hyperloglog_trans_inner( state: Option>, size: i32, value: Option, fc: pg_sys::FunctionCallInfo, arg_type: pg_sys::Oid, ) -> Option> { unsafe { in_aggregate_context(fc, || { //TODO is this the right way to handle NULL? let value = match value { None => return state, Some(value) => value.0, }; let mut state = match state { None => { // TODO specialize hash function for bytea types? // ints? floats? uuids? other primitive types? let size: usize = size.try_into().unwrap(); let b = size.checked_next_power_of_two().unwrap().trailing_zeros(); if !(4..=18).contains(&b) { error!( "Invalid value for size {}. \ Size must be between 16 and 262144, \ though less than 1024 not recommended", size ) } let typ = arg_type; let collation = get_collation(fc); let hasher = DatumHashBuilder::from_type_id(typ, collation); let trans = HyperLogLogTrans { logger: HLL::new(b as u8, hasher), }; trans.into() } Some(state) => state, }; state.logger.add(&HashableDatum(value)); Some(state) }) } } #[pg_extern(immutable, parallel_safe)] pub fn hyperloglog_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { hyperloglog_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn hyperloglog_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (None, Some(state2)) => Some(state2.clone().into()), (Some(state1), None) => Some(state1.clone().into()), (Some(state1), Some(state2)) => { let mut logger = state1.logger.clone(); logger.merge_in(&state2.logger); Some(HyperLogLogTrans { logger }.into()) } }) } } use crate::raw::bytea; #[pg_extern(immutable, parallel_safe, strict)] pub fn hyperloglog_serialize(state: Internal) -> bytea { let mut state = state; let state: &mut HyperLogLogTrans = unsafe { state.get_mut().unwrap() }; state.logger.merge_all(); crate::do_serialize!(state) } #[pg_extern(strict, immutable, parallel_safe)] pub fn hyperloglog_deserialize(bytes: bytea, _internal: Internal) -> Option { hyperloglog_deserialize_inner(bytes).internal() } pub fn hyperloglog_deserialize_inner(bytes: bytea) -> Inner { let i: HyperLogLogTrans = crate::do_deserialize!(bytes, HyperLogLogTrans); i.into() } pg_type! { #[derive(Debug)] struct HyperLogLog<'input> { #[flat_serialize::flatten] log: Storage<'input>, } } flat_serialize_macro::flat_serialize! { #[derive(Debug, Serialize, Deserialize)] enum Storage<'a> { storage_kind: u64, Sparse: 1 { num_compressed: u64, // Oids are stored in postgres arrays, so it should be safe to store them // in our types as long as we do send/recv and in/out correctly // see https://github.com/postgres/postgres/blob/b8d0cda53377515ac61357ec4a60e85ca873f486/src/include/utils/array.h#L90 element_type: ShortTypeId, collation: PgCollationId, compressed_bytes: u32, precision: u8, compressed: [u8; self.compressed_bytes], }, Dense: 2 { // Oids are stored in postgres arrays, so it should be safe to store them // in our types as long as we do send/recv and in/out correctly // see https://github.com/postgres/postgres/blob/b8d0cda53377515ac61357ec4a60e85ca873f486/src/include/utils/array.h#L90 element_type: ShortTypeId, collation: PgCollationId, precision: u8, registers: [u8; 1 + (1usize << self.precision) * 6 / 8] //TODO should we just store len? }, } } ron_inout_funcs!(HyperLogLog<'input>); #[pg_extern(immutable, parallel_safe)] fn hyperloglog_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { hyperloglog_final_inner(unsafe { state.to_inner() }, fcinfo) } fn hyperloglog_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state, }; flatten_log(&mut state.logger).into() }) } } extension_sql!( "\n\ CREATE AGGREGATE hyperloglog(size integer, value AnyElement)\n\ (\n\ stype = internal,\n\ sfunc = hyperloglog_trans,\n\ finalfunc = hyperloglog_final,\n\ combinefunc = hyperloglog_combine,\n\ serialfunc = hyperloglog_serialize,\n\ deserialfunc = hyperloglog_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "hll_agg", requires = [ hyperloglog_trans, hyperloglog_final, hyperloglog_combine, hyperloglog_serialize, hyperloglog_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE approx_count_distinct(value AnyElement)\n\ (\n\ stype = internal,\n\ sfunc = approx_count_distinct_trans,\n\ finalfunc = hyperloglog_final,\n\ combinefunc = hyperloglog_combine,\n\ serialfunc = hyperloglog_serialize,\n\ deserialfunc = hyperloglog_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "approx_count_distinct_agg", requires = [ approx_count_distinct_trans, hyperloglog_final, hyperloglog_combine, hyperloglog_serialize, hyperloglog_deserialize ], ); #[pg_extern(immutable, parallel_safe)] pub fn hyperloglog_union<'a>( state: Internal, other: Option>, fc: pg_sys::FunctionCallInfo, ) -> Option { hyperloglog_union_inner(unsafe { state.to_inner() }, other, fc).internal() } pub fn hyperloglog_union_inner( state: Option>, other: Option, fc: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fc, || { let other = match other { Some(other) => other, None => { return state; } }; let mut state = match state { Some(state) => state, None => { let state = HyperLogLogTrans { logger: unflatten_log(other).into_owned(), }; return Some(state.into()); } }; let other = unflatten_log(other); if state.logger.buildhasher.type_id != other.buildhasher.type_id { error!("mismatched types") } // TODO error on mismatched collation? state.logger.merge_in(&other); Some(state) }) } } extension_sql!( "\n\ CREATE AGGREGATE rollup(hyperloglog Hyperloglog)\n\ (\n\ stype = internal,\n\ sfunc = hyperloglog_union,\n\ finalfunc = hyperloglog_final,\n\ combinefunc = hyperloglog_combine,\n\ serialfunc = hyperloglog_serialize,\n\ deserialfunc = hyperloglog_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "hll_rollup", requires = [ hyperloglog_union, hyperloglog_final, hyperloglog_combine, hyperloglog_serialize, hyperloglog_deserialize ], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_hyperloglog_count<'a>( sketch: HyperLogLog<'a>, _accessor: AccessorDistinctCount, ) -> i64 { hyperloglog_count(sketch) } #[pg_extern(name = "distinct_count", immutable, parallel_safe)] pub fn hyperloglog_count<'a>(hyperloglog: HyperLogLog<'a>) -> i64 { // count does not depend on the type parameters let log = match &hyperloglog.log { Storage::Sparse { num_compressed, precision, compressed, .. } => HLL::::from_sparse_parts( compressed.slice(), *num_compressed, *precision, (), ), Storage::Dense { precision, registers, .. } => HLL::::from_dense_parts(registers.slice(), *precision, ()), }; log.immutable_estimate_count() as i64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_hyperloglog_error<'a>(sketch: HyperLogLog<'a>, _accessor: AccessorStderror) -> f64 { hyperloglog_error(sketch) } #[pg_extern(name = "stderror", immutable, parallel_safe)] pub fn hyperloglog_error<'a>(hyperloglog: HyperLogLog<'a>) -> f64 { let precision = match hyperloglog.log { Storage::Sparse { precision, .. } => precision, Storage::Dense { precision, .. } => precision, }; hyperloglogplusplus::error_for_precision(precision) } impl HyperLogLog<'_> { pub fn build_from( size: i32, type_id: Oid, collation: Option, data: impl Iterator, ) -> HyperLogLog<'static> { unsafe { let b = TryInto::::try_into(size) .unwrap() .checked_next_power_of_two() .unwrap() .trailing_zeros(); let hasher = DatumHashBuilder::from_type_id(type_id, collation); let mut logger: HLL = HLL::new(b as u8, hasher); for datum in data { logger.add(&HashableDatum(datum)); } flatten_log(&mut logger) } } } fn flatten_log(hyperloglog: &mut HLL) -> HyperLogLog<'static> { let (element_type, collation) = { let hasher = &hyperloglog.buildhasher; (ShortTypeId(hasher.type_id), PgCollationId(hasher.collation)) }; // we need to flatten the vector to a single buffer that contains // both the size, the data, and the varlen header let flat = match hyperloglog.to_parts() { HyperLogLogStorage::Sparse(sparse) => unsafe { flatten!(HyperLogLog { log: Storage::Sparse { element_type, collation, num_compressed: sparse.num_compressed, precision: sparse.precision, compressed_bytes: sparse.compressed.num_bytes() as u32, compressed: sparse.compressed.bytes().into(), } }) }, HyperLogLogStorage::Dense(dense) => unsafe { // TODO check that precision and length match? flatten!(HyperLogLog { log: Storage::Dense { element_type, collation, precision: dense.precision, registers: dense.registers.bytes().into(), } }) }, }; flat } fn unflatten_log(hyperloglog: HyperLogLog) -> HLL { match &hyperloglog.log { Storage::Sparse { num_compressed, precision, compressed, element_type, collation, compressed_bytes: _, } => HLL::::from_sparse_parts( compressed.slice(), *num_compressed, *precision, unsafe { DatumHashBuilder::from_type_id(element_type.0, Some(collation.0)) }, ), Storage::Dense { precision, registers, element_type, collation, } => HLL::::from_dense_parts( registers.slice(), *precision, unsafe { DatumHashBuilder::from_type_id(element_type.0, Some(collation.0)) }, ), } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; use rand::distributions::{Distribution, Uniform}; #[pg_test] fn test_hll_aggregate() { Spi::connect_mut(|client| { let text = client .update( "SELECT \ hyperloglog(32, v::float)::TEXT \ FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let expected = "(\ version:1,\ log:Dense(\ element_type:FLOAT8,\ collation:None,\ precision:5,\ registers:[\ 20,64,132,12,81,1,8,64,133,4,64,136,4,82,3,12,17,\ 65,24,32,197,16,32,132,255\ ]\ )\ )"; assert_eq!(text.unwrap(), expected); let (count, arrow_count) = client .update( "SELECT \ distinct_count(\ hyperloglog(32, v::float)\ ), \ hyperloglog(32, v::float) -> distinct_count() \ FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); assert_eq!(count, Some(132)); assert_eq!(count, arrow_count); let count2 = client .update(&format!("SELECT distinct_count('{expected}')"), None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(count2, count); }); } #[pg_test] // Should have same results as test_hll_distinct_aggregate running with the same number of buckets fn test_approx_count_distinct_aggregate() { Spi::connect_mut(|client| { let text = client .update( "SELECT \ approx_count_distinct(v::float)::TEXT \ FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let expected = "(\ version:1,\ log:Sparse(\ num_compressed:100,\ element_type:FLOAT8,\ collation:None,\ compressed_bytes:320,\ precision:15,\ compressed:[\ 4,61,17,164,87,15,68,239,255,132,121,35,164,5,74,132,160,\ 109,4,177,61,100,68,200,4,144,32,132,118,9,228,190,94,68,\ 120,56,36,121,213,200,97,65,3,200,108,96,2,72,128,10,2,100,\ 182,161,36,218,115,196,202,145,228,189,224,132,21,63,36,\ 88,116,100,162,122,132,139,97,228,245,19,36,242,15,228,115,\ 65,164,114,2,8,224,32,2,72,157,130,2,68,232,93,136,105,1,2,\ 132,16,59,4,34,46,8,244,104,2,226,240,8,82,159,2,200,225,49,\ 2,132,96,9,4,222,195,164,54,22,228,201,59,164,168,27,100,32,\ 58,8,76,32,2,36,56,17,136,18,143,4,132,162,156,196,178,22,\ 132,119,72,228,213,48,4,26,63,68,28,156,36,151,75,36,19,202,\ 164,152,111,164,177,240,98,27,196,254,46,8,138,82,6,164,53,38,\ 36,125,151,8,167,213,3,4,167,248,68,183,61,36,149,32,164,112,\ 121,164,14,139,100,56,166,164,24,48,8,33,90,2,132,115,89,72,\ 100,112,5,196,221,128,228,245,33,4,216,92,8,33,195,6,100,8,54,\ 200,74,2,5,200,101,158,3,228,106,110,72,151,98,2,228,38,26,196,\ 143,15,36,122,57,200,191,43,2,164,225,186,196,219,46,36,26,146,\ 228,129,128,136,6,183,2,4,238,106,200,48,168,2,164,14,13,68,55,\ 196,132,208,90,164,50,130,68,58,137,196,3,88,196,71,31\ ]\ )\ )"; assert_eq!(text.unwrap(), expected); let (count, arrow_count) = client .update( "SELECT \ distinct_count(\ approx_count_distinct(v::float)\ ), \ approx_count_distinct(v::float) -> distinct_count() \ FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); assert_eq!(count, Some(100)); assert_eq!(count, arrow_count); let count2 = client .update(&format!("SELECT distinct_count('{expected}')"), None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(count2, count); }); } #[pg_test] fn test_hll_byte_io() { unsafe { // Unable to build the hyperloglog through hyperloglog_trans, as that requires a valid fcinfo to determine OIDs. let hasher = DatumHashBuilder::from_type_id( pg_sys::TEXTOID, Some(crate::serialization::collations::DEFAULT_COLLATION_OID), ); let mut control = HyperLogLogTrans { logger: HLL::new(6, hasher), }; control.logger.add(&HashableDatum( rust_str_to_text_p("first").into_datum().unwrap(), )); control.logger.add(&HashableDatum( rust_str_to_text_p("second").into_datum().unwrap(), )); control.logger.add(&HashableDatum( rust_str_to_text_p("first").into_datum().unwrap(), )); control.logger.add(&HashableDatum( rust_str_to_text_p("second").into_datum().unwrap(), )); control.logger.add(&HashableDatum( rust_str_to_text_p("third").into_datum().unwrap(), )); let buffer = hyperloglog_serialize(Inner::from(control.clone()).internal().unwrap()); let buffer = pgrx::varlena::varlena_to_byte_slice(buffer.0.cast_mut_ptr()); let mut expected = vec![ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 136, 136, 9, 7, 8, 74, 76, 47, 200, 231, 53, 25, 3, 0, 0, 0, 0, 0, 0, 0, 6, 9, 0, 0, 0, 1, ]; bincode::serialize_into( &mut expected, &PgCollationId(crate::serialization::collations::DEFAULT_COLLATION_OID), ) .unwrap(); assert_eq!(buffer, expected); let expected = pgrx::varlena::rust_byte_slice_to_bytea(&expected); let new_state = hyperloglog_deserialize_inner(bytea(pg_sys::Datum::from(expected.as_ptr()))); control.logger.merge_all(); // Sparse representation buffers always merged on serialization assert!(*new_state == control); // Now generate a dense represenataion and validate that for i in 0..500 { control.logger.add(&HashableDatum( rust_str_to_text_p(&i.to_string()).into_datum().unwrap(), )); } let buffer = hyperloglog_serialize(Inner::from(control.clone()).internal().unwrap()); let buffer = pgrx::varlena::varlena_to_byte_slice(buffer.0.cast_mut_ptr()); let mut expected = vec![ 1, 1, 1, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 20, 65, 2, 12, 48, 199, 20, 33, 4, 12, 49, 67, 16, 81, 66, 32, 145, 131, 24, 49, 4, 20, 33, 5, 8, 81, 66, 12, 81, 4, 8, 49, 2, 8, 65, 131, 24, 32, 133, 12, 50, 66, 12, 48, 197, 12, 81, 130, 255, 58, 6, 255, 255, 255, 255, 255, 255, 255, 3, 9, 0, 0, 0, 1, ]; bincode::serialize_into( &mut expected, &PgCollationId(crate::serialization::collations::DEFAULT_COLLATION_OID), ) .unwrap(); assert_eq!(buffer, expected); let expected = pgrx::varlena::rust_byte_slice_to_bytea(&expected); let new_state = hyperloglog_deserialize_inner(bytea(pg_sys::Datum::from(expected.as_ptr()))); assert!(*new_state == control); } } #[pg_test] fn test_hll_aggregate_int() { Spi::connect_mut(|client| { let text = client .update( "SELECT hyperloglog(32, v::int)::TEXT FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let expected = "(\ version:1,\ log:Dense(\ element_type:INT4,\ collation:None,\ precision:5,\ registers:[\ 8,49,0,12,32,129,24,32,195,16,33,2,12,1,68,4,16,\ 196,20,64,133,8,17,67,255\ ]\ )\ )"; assert_eq!(text.unwrap(), expected); let count = client .update( "SELECT \ distinct_count(\ hyperloglog(32, v::int)\ ) FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(count, Some(96)); let count2 = client .update(&format!("SELECT distinct_count('{expected}')"), None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(count2, count); }); } #[pg_test] fn test_hll_aggregate_text() { Spi::connect_mut(|client| { use crate::serialization::PgCollationId; let text = client .update( "SELECT \ hyperloglog(32, v::text)::TEXT \ FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let default_collation = ron::to_string(&PgCollationId( crate::serialization::collations::DEFAULT_COLLATION_OID, )) .unwrap(); let expected = format!( "(\ version:1,\ log:Dense(\ element_type:TEXT,\ collation:{default_collation},\ precision:5,\ registers:[\ 12,33,3,8,33,4,20,50,3,12,32,133,4,32,67,8,48,\ 128,8,33,4,8,32,197,255\ ]\ )\ )" ); assert_eq!(text.unwrap(), expected); let count = client .update( "SELECT distinct_count(\ hyperloglog(32, v::text)\ ) FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(count, Some(111)); let count2 = client .update(&format!("SELECT distinct_count('{expected}')"), None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(count2, count); }); } #[pg_test] fn test_hll_union_text() { Spi::connect_mut(|client| { { // self-union should be a nop let expected = client .update( "SELECT \ hyperloglog(32, v::text)::TEXT \ FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let text = client .update( "SELECT rollup(logs)::text \ FROM (\ (SELECT hyperloglog(32, v::text) logs \ FROM generate_series(1, 100) v\ ) UNION ALL \ (SELECT hyperloglog(32, v::text) \ FROM generate_series(1, 100) v)\ ) q", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(text.unwrap(), expected); } { // differing unions should be a sum of the distinct counts let query = "SELECT distinct_count(rollup(logs)) \ FROM (\ (SELECT hyperloglog(32, v::text) logs \ FROM generate_series(1, 100) v) \ UNION ALL \ (SELECT hyperloglog(32, v::text) \ FROM generate_series(50, 150) v)\ ) q"; let count = client .update(query, None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(count, Some(153)); } }); } #[pg_test] fn test_hll_null_input_yields_null_output() { Spi::connect_mut(|client| { let output = client .update("SELECT hyperloglog(32, null::int)::TEXT", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(output, None) }) } #[pg_test( error = "Invalid value for size 2. Size must be between 16 and 262144, though less than 1024 not recommended" )] fn test_hll_error_too_small() { Spi::connect_mut(|client| { let output = client .update("SELECT hyperloglog(2, 'foo'::text)::TEXT", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(output, None) }) } #[pg_test] fn test_hll_size_min() { Spi::connect_mut(|client| { let output = client .update("SELECT hyperloglog(16, 'foo'::text)::TEXT", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert!(output.is_some()) }) } #[pg_test] fn test_hll_size_max() { Spi::connect_mut(|client| { let output = client .update("SELECT hyperloglog(262144, 'foo'::text)::TEXT", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert!(output.is_some()) }) } #[pg_test] fn stderror_arrow_match() { Spi::connect_mut(|client| { let (count, arrow_count) = client .update( "SELECT \ stderror(\ hyperloglog(32, v::float)\ ), \ hyperloglog(32, v::float) -> stderror() \ FROM generate_series(1, 100) v", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); assert_eq!(Some(0.18384776310850234), count); assert_eq!(count, arrow_count); }); } #[pg_test] fn bias_correct_values_accurate() { const NUM_BIAS_TRIALS: usize = 5; const MAX_TRIAL_ERROR: f64 = 0.05; Spi::connect_mut(|client| { // This should match THRESHOLD_DATA_VEC from b=12-18 let thresholds = [3100, 6500, 11500, 20000, 50000, 120000, 350000]; let rand_precision: Uniform = Uniform::new_inclusive(12, 18); let mut rng = rand::thread_rng(); for _ in 0..NUM_BIAS_TRIALS { let precision = rand_precision.sample(&mut rng); let rand_cardinality: Uniform = Uniform::new_inclusive(thresholds[precision - 12], 5 * (1 << precision)); let cardinality = rand_cardinality.sample(&mut rng); let query = format!( "SELECT hyperloglog({}, v) -> distinct_count() FROM generate_series(1, {}) v", 1 << precision, cardinality ); let estimate = client .update(&query, None, &[]) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let error = (estimate as f64 / cardinality as f64).abs() - 1.; assert!(error < MAX_TRIAL_ERROR, "hyperloglog with {} buckets on cardinality {} gave a result of {}. Resulting error {} exceeds max allowed ({})", 2^precision, cardinality, estimate, error, MAX_TRIAL_ERROR); } }); } #[pg_test( error = "Invalid value for size 262145. Size must be between 16 and 262144, though less than 1024 not recommended" )] fn test_hll_error_too_large() { Spi::connect_mut(|client| { let output = client .update("SELECT hyperloglog(262145, 'foo'::text)::TEXT", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(output, None) }) } #[pg_test] fn test_hll_null_rollup() { Spi::connect_mut(|client| { let output1 = client .update( "SELECT distinct_count(rollup(logs)) FROM ( (SELECT hyperloglog(16, v::text) logs FROM generate_series(1, 5) v) UNION ALL (SELECT hyperloglog(16, v::text) FROM generate_series(6, 10) v WHERE v <=5) ) hll;", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let output2 = client .update( "SELECT distinct_count(rollup(logs)) FROM ( (SELECT hyperloglog(16, v::text) logs FROM generate_series(1, 5) v) ) hll;", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(output1, output2); }) } //TODO test continuous aggregates } ================================================ FILE: extension/src/lib.rs ================================================ // so we can support upgrading pgrx #![allow(unexpected_cfgs)] // so we can allow very new Clippy lints #![allow(unknown_lints)] // flat_serialize! alignment checks hit this for any single byte field (of which all pg_types! have two by default) #![allow(clippy::modulo_one)] // some disagreement between clippy and the rust compiler about when lifetime are and are not needed #![allow(clippy::extra_unused_lifetimes)] // every function calling in_aggregate_context should be unsafe #![allow(clippy::not_unsafe_ptr_arg_deref)] // since 0.5 pgrx requires non-elided lifetimes on extern functions: https://github.com/tcdi/pgrx/issues/721 #![allow(clippy::needless_lifetimes)] // triggered by pg_extern macros #![allow(clippy::useless_conversion)] // caused by pgrx #![allow(clippy::unnecessary_lazy_evaluations)] // clippy triggers an internal complier error checking this #![allow(clippy::unnecessary_literal_unwrap)] pub mod accessors; pub mod asap; pub mod candlestick; pub mod counter_agg; pub mod countminsketch; pub mod frequency; pub mod gauge_agg; pub mod heartbeat_agg; pub mod hyperloglog; pub mod lttb; pub mod nmost; pub mod range; pub mod saturation; pub(crate) mod serialization; pub mod state_aggregate; pub mod stats_agg; pub mod tdigest; pub mod time_vector; pub mod time_weighted_average; pub mod uddsketch; pub mod utilities; mod aggregate_utils; mod datum_utils; mod duration; mod palloc; mod pg_any_element; mod raw; mod stabilization_info; mod stabilization_tests; #[macro_use] mod type_builder; #[cfg(any(test, feature = "pg_test"))] mod aggregate_builder_tests; use pgrx::*; pgrx::pg_module_magic!(); #[pg_guard] pub extern "C-unwind" fn _PG_init() { // Nothing to do here } extension_sql!( r#"GRANT USAGE ON SCHEMA toolkit_experimental TO PUBLIC;"#, name = "final_grant", finalize, ); #[cfg(test)] pub mod pg_test { pub fn setup(_options: Vec<&str>) { // perform one-off initialization when the pg_test framework starts } pub fn postgresql_conf_options() -> Vec<&'static str> { // return any postgresql.conf settings that are required for your tests vec![] } } ================================================ FILE: extension/src/lttb.rs ================================================ use pgrx::*; use std::borrow::Cow; use crate::{ aggregate_utils::in_aggregate_context, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, time_vector, }; use tspoint::TSPoint; use crate::time_vector::{Timevector_TSTZ_F64, Timevector_TSTZ_F64Data}; pub struct LttbTrans { series: Vec, resolution: usize, gap_interval: i64, } #[pg_extern(immutable, parallel_safe)] pub fn lttb_trans( state: Internal, time: crate::raw::TimestampTz, val: Option, resolution: i32, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { lttb_trans_inner(unsafe { state.to_inner() }, time, val, resolution, fcinfo).internal() } pub fn lttb_trans_inner( state: Option>, time: crate::raw::TimestampTz, val: Option, resolution: i32, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let val = match val { None => return state, Some(val) => val, }; let mut state = match state { Some(state) => state, None => { if resolution <= 2 { error!("resolution must be greater than 2") } LttbTrans { series: vec![], resolution: resolution as usize, gap_interval: 0, } .into() } }; state.series.push(TSPoint { ts: time.into(), val, }); Some(state) }) } } #[pg_extern(immutable, parallel_safe)] pub fn lttb_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { lttb_final_inner(unsafe { state.to_inner() }, fcinfo) } pub fn lttb_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state, }; state.series.sort_by_key(|point| point.ts); let downsampled = lttb(&state.series[..], state.resolution); flatten!(Timevector_TSTZ_F64 { num_points: downsampled.len() as u32, flags: time_vector::FLAG_IS_SORTED, internal_padding: [0; 3], points: (&*downsampled).into(), null_val: std::vec::from_elem(0_u8, downsampled.len().div_ceil(8)).into() }) .into() }) } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn gp_lttb_trans( state: Internal, time: crate::raw::TimestampTz, val: Option, gap: crate::raw::Interval, resolution: i32, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let state = unsafe { state.to_inner() }; let needs_interval = state.is_none(); // Don't love this code, but need to compute gap_val if needed before time is moved let gap_val = if needs_interval { crate::datum_utils::interval_to_ms(&time, &gap) } else { 0 }; let mut trans = lttb_trans_inner(state, time, val, resolution, fcinfo); if needs_interval { #[allow(clippy::manual_inspect)] // need to mutate s trans.as_mut().map(|s| { s.gap_interval = gap_val; s }); } trans.internal() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn gp_lttb_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { gap_preserving_lttb_final_inner(unsafe { state.to_inner() }, fcinfo) } pub fn gap_preserving_lttb_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state, }; state.series.sort_by_key(|point| point.ts); let count = state.series.len(); let max_gap = if state.gap_interval > 0 { state.gap_interval } else { let range = state.series[count - 1].ts - state.series[0].ts; range / state.resolution as i64 }; // Tracking endpoints remaining will keep us from assigning too many points // to early LTTB computations when there are lots of gaps later in the timeseries let mut endpoints_remaining = 2; let mut start = 0; for i in 0..count - 1 { if state.series[i + 1].ts - state.series[i].ts > max_gap { if i == start { endpoints_remaining += 1; } else { endpoints_remaining += 2; } start = i + 1; } } let mut points_remaining = state.resolution as i64; let mut downsampled = vec![]; start = 0; for i in 0..count - 1 { if state.series[i + 1].ts - state.series[i].ts > max_gap { if i == start { // 1 len subarray downsampled.push(state.series[i]); start = i + 1; points_remaining -= 1; endpoints_remaining -= 1; } else { let sgmt_pct_of_remaining_pts = (i - start - 1) as f64 / (count - start - endpoints_remaining) as f64; let pts_for_sgmt = std::cmp::max( ((points_remaining - endpoints_remaining as i64) as f64 * sgmt_pct_of_remaining_pts) as usize, 0, ) + 2; downsampled .append(&mut lttb(&state.series[start..=i], pts_for_sgmt).into_owned()); start = i + 1; points_remaining -= pts_for_sgmt as i64; endpoints_remaining -= 2; } } } // remainder if start == count - 1 { downsampled.push(state.series[count - 1]); } else { downsampled.append( &mut lttb( &state.series[start..count], std::cmp::max(points_remaining, 2) as usize, ) .into_owned(), ); } flatten!(Timevector_TSTZ_F64 { num_points: downsampled.len() as u32, flags: time_vector::FLAG_IS_SORTED, internal_padding: [0; 3], null_val: std::vec::from_elem(0_u8, downsampled.len().div_ceil(8)).into(), points: downsampled.into(), }) .into() }) } } extension_sql!( "\n\ CREATE AGGREGATE lttb(ts TIMESTAMPTZ, value DOUBLE PRECISION, resolution integer) (\n\ sfunc = lttb_trans,\n\ stype = internal,\n\ finalfunc = lttb_final\n\ );\n\ ", name = "lttb_agg", requires = [lttb_trans, lttb_final], ); extension_sql!("\n\ CREATE AGGREGATE toolkit_experimental.gp_lttb(ts TIMESTAMPTZ, value DOUBLE PRECISION, resolution integer) (\n\ sfunc = lttb_trans,\n\ stype = internal,\n\ finalfunc = toolkit_experimental.gp_lttb_final\n\ );\n\ ", name = "gp_lttb_agg", requires = [lttb_trans, gp_lttb_final], ); extension_sql!("\n\ CREATE AGGREGATE toolkit_experimental.gp_lttb(ts TIMESTAMPTZ, value DOUBLE PRECISION, gapsize INTERVAL, resolution integer) (\n\ sfunc = toolkit_experimental.gp_lttb_trans,\n\ stype = internal,\n\ finalfunc = toolkit_experimental.gp_lttb_final\n\ );\n\ ", name = "gp_lttb_agg_with_size", requires = [gp_lttb_trans, gp_lttb_final], ); // based on https://github.com/jeromefroe/lttb-rs version 0.2.0 pub fn lttb(data: &[TSPoint], threshold: usize) -> Cow<'_, [TSPoint]> { if threshold >= data.len() || threshold == 0 { // Nothing to do. return Cow::Borrowed(data); } let mut sampled = Vec::with_capacity(threshold); // Bucket size. Leave room for start and end data points. let every = ((data.len() - 2) as f64) / ((threshold - 2) as f64); // Initially a is the first point in the triangle. let mut a = 0; // Always add the first point. sampled.push(data[a]); for i in 0..threshold - 2 { // Calculate point average for next bucket (containing c). let mut avg_x = 0i64; let mut avg_y = 0f64; let avg_range_start = (((i + 1) as f64) * every) as usize + 1; let mut end = (((i + 2) as f64) * every) as usize + 1; if end >= data.len() { end = data.len(); } let avg_range_end = end; let avg_range_length = (avg_range_end - avg_range_start) as f64; for i in 0..(avg_range_end - avg_range_start) { let idx = avg_range_start + i; avg_x += data[idx].ts; avg_y += data[idx].val; } avg_x /= avg_range_length as i64; avg_y /= avg_range_length; // Get the range for this bucket. let range_offs = ((i as f64) * every) as usize + 1; let range_to = (((i + 1) as f64) * every) as usize + 1; // Point a. let point_a_x = data[a].ts; let point_a_y = data[a].val; let mut max_area = -1f64; let mut next_a = range_offs; for i in 0..(range_to - range_offs) { let idx = range_offs + i; // Calculate triangle area over three buckets. let area = ((point_a_x - avg_x) as f64 * (data[idx].val - point_a_y) - (point_a_x - data[idx].ts) as f64 * (avg_y - point_a_y)) .abs() * 0.5; if area > max_area { max_area = area; next_a = idx; // Next a is this b. } } sampled.push(data[next_a]); // Pick this point from the bucket. a = next_a; // This a is the next a (chosen b). } // Always add the last point. sampled.push(data[data.len() - 1]); Cow::Owned(sampled) } #[pg_extern(name = "lttb", immutable, parallel_safe)] pub fn lttb_on_timevector( series: Timevector_TSTZ_F64<'static>, threshold: i32, ) -> Option> { lttb_ts(series, threshold as usize).into() } // based on https://github.com/jeromefroe/lttb-rs version 0.2.0 pub fn lttb_ts(data: Timevector_TSTZ_F64, threshold: usize) -> Timevector_TSTZ_F64 { if !data.is_sorted() { panic!("lttb requires sorted timevector"); } if threshold >= data.num_points() || threshold == 0 { // Nothing to do. return data.in_current_context(); // can we avoid this copy??? } let mut sampled = Vec::with_capacity(threshold); // Bucket size. Leave room for start and end data points. let every = ((data.num_points() - 2) as f64) / ((threshold - 2) as f64); // Initially a is the first point in the triangle. let mut a = 0; // Always add the first point. sampled.push(data.get(a).unwrap()); for i in 0..threshold - 2 { // Calculate point average for next bucket (containing c). let mut avg_x = 0i64; let mut avg_y = 0f64; let avg_range_start = (((i + 1) as f64) * every) as usize + 1; let mut end = (((i + 2) as f64) * every) as usize + 1; if end >= data.num_points() { end = data.num_points(); } let avg_range_end = end; let avg_range_length = (avg_range_end - avg_range_start) as f64; for i in 0..(avg_range_end - avg_range_start) { let idx = avg_range_start + i; let point = data.get(idx).unwrap(); avg_x += point.ts; avg_y += point.val; } avg_x /= avg_range_length as i64; avg_y /= avg_range_length; // Get the range for this bucket. let range_offs = ((i as f64) * every) as usize + 1; let range_to = (((i + 1) as f64) * every) as usize + 1; // Point a. let point_a_x = data.get(a).unwrap().ts; let point_a_y = data.get(a).unwrap().val; let mut max_area = -1f64; let mut next_a = range_offs; for i in 0..(range_to - range_offs) { let idx = range_offs + i; // Calculate triangle area over three buckets. let area = ((point_a_x - avg_x) as f64 * (data.get(idx).unwrap().val - point_a_y) - (point_a_x - data.get(idx).unwrap().ts) as f64 * (avg_y - point_a_y)) .abs() * 0.5; if area > max_area { max_area = area; next_a = idx; // Next a is this b. } } sampled.push(data.get(next_a).unwrap()); // Pick this point from the bucket. a = next_a; // This a is the next a (chosen b). } // Always add the last point. sampled.push(data.get(data.num_points() - 1).unwrap()); let nulls_len = sampled.len().div_ceil(8); crate::build! { Timevector_TSTZ_F64 { num_points: sampled.len() as _, flags: time_vector::FLAG_IS_SORTED, internal_padding: [0; 3], points: sampled.into(), null_val: std::vec::from_elem(0_u8, nulls_len).into(), } } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_lttb_equivalence() { Spi::connect_mut(|client| { client .update( "CREATE TABLE test(time TIMESTAMPTZ, value DOUBLE PRECISION);", None, &[], ) .unwrap(); client.update( "INSERT INTO test SELECT time, value FROM toolkit_experimental.generate_periodic_normal_series('2020-01-01 UTC'::timestamptz, NULL);", None, &[]).unwrap(); client .update( "CREATE TABLE results1(time TIMESTAMPTZ, value DOUBLE PRECISION);", None, &[], ) .unwrap(); client .update( "INSERT INTO results1 SELECT time, value FROM unnest( (SELECT lttb(time, value, 100) FROM test) );", None, &[], ) .unwrap(); client .update( "CREATE TABLE results2(time TIMESTAMPTZ, value DOUBLE PRECISION);", None, &[], ) .unwrap(); client .update( "INSERT INTO results2 SELECT time, value FROM unnest( (SELECT lttb( (SELECT timevector(time, value) FROM test), 100) ) );", None, &[], ) .unwrap(); let delta = client .update("SELECT count(*) FROM results1 r1 FULL OUTER JOIN results2 r2 ON r1 = r2 WHERE r1 IS NULL OR r2 IS NULL;" , None, &[]) .unwrap().first() .get_one::().unwrap(); assert_eq!(delta.unwrap(), 0); }) } #[pg_test] fn test_lttb_result() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let mut result = client .update( r#"SELECT unnest(lttb(ts, val, 5))::TEXT FROM (VALUES ('2020-1-1'::timestamptz, 10), ('2020-1-2'::timestamptz, 21), ('2020-1-3'::timestamptz, 19), ('2020-1-4'::timestamptz, 32), ('2020-1-5'::timestamptz, 12), ('2020-1-6'::timestamptz, 14), ('2020-1-7'::timestamptz, 18), ('2020-1-8'::timestamptz, 29), ('2020-1-9'::timestamptz, 23), ('2020-1-10'::timestamptz, 27), ('2020-1-11'::timestamptz, 14) ) AS v(ts, val)"#, None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",10)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-04 00:00:00+00\",32)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-05 00:00:00+00\",12)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-08 00:00:00+00\",29)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-11 00:00:00+00\",14)") ); assert!(result.next().is_none()); }) } #[pg_test] fn test_gp_lttb() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let mut result = client .update( r#"SELECT unnest(toolkit_experimental.gp_lttb(ts, val, 7))::TEXT FROM (VALUES ('2020-1-1'::timestamptz, 10), ('2020-1-2'::timestamptz, 21), ('2020-1-3'::timestamptz, 19), ('2020-1-4'::timestamptz, 32), ('2020-1-5'::timestamptz, 12), ('2020-2-6'::timestamptz, 14), ('2020-3-7'::timestamptz, 18), ('2020-3-8'::timestamptz, 29), ('2020-3-9'::timestamptz, 23), ('2020-3-10'::timestamptz, 27), ('2020-3-11'::timestamptz, 14) ) AS v(ts, val)"#, None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",10)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-04 00:00:00+00\",32)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-05 00:00:00+00\",12)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-02-06 00:00:00+00\",14)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-03-07 00:00:00+00\",18)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-03-08 00:00:00+00\",29)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-03-11 00:00:00+00\",14)") ); assert!(result.next().is_none()); }) } #[pg_test] fn test_gp_lttb_with_gap() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let mut result = client .update( r#"SELECT unnest(toolkit_experimental.gp_lttb(ts, val, '36hr', 5))::TEXT FROM (VALUES ('2020-1-1'::timestamptz, 10), ('2020-1-2'::timestamptz, 21), ('2020-1-4'::timestamptz, 32), ('2020-1-5'::timestamptz, 12), ('2020-2-6'::timestamptz, 14), ('2020-3-7'::timestamptz, 18), ('2020-3-8'::timestamptz, 29), ('2020-3-10'::timestamptz, 27), ('2020-3-11'::timestamptz, 14) ) AS v(ts, val)"#, None, &[], ) .unwrap(); // This should include everything, despite target resolution of 5 assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",10)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-02 00:00:00+00\",21)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-04 00:00:00+00\",32)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-05 00:00:00+00\",12)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-02-06 00:00:00+00\",14)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-03-07 00:00:00+00\",18)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-03-08 00:00:00+00\",29)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-03-10 00:00:00+00\",27)") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-03-11 00:00:00+00\",14)") ); assert!(result.next().is_none()); }) } } ================================================ FILE: extension/src/nmost/max_by_float.rs ================================================ use pgrx::{iter::TableIterator, *}; use crate::nmost::max_float::*; use crate::nmost::*; use crate::{ build, flatten, palloc::{Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; use ordered_float::NotNan; use std::cmp::Reverse; type MaxByFloatTransType = NMostByTransState>>; pg_type! { #[derive(Debug)] struct MaxByFloats<'input> { values: MaxFloatsData<'input>, // Nesting pg_types adds 8 bytes of header data: DatumStore<'input>, } } ron_inout_funcs!(MaxByFloats<'input>); impl<'input> From for MaxByFloats<'input> { fn from(item: MaxByFloatTransType) -> Self { let (capacity, val_ary, data) = item.into_sorted_parts(); unsafe { flatten!(MaxByFloats { values: build!(MaxFloats { capacity: capacity as u32, elements: val_ary.len() as u32, values: val_ary .into_iter() .map(|x| f64::from(x.0)) .collect::>() .into() }) .0, data, }) } } } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_float_trans( state: Internal, value: f64, data: AnyElement, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_trans_function( unsafe { state.to_inner::() }, Reverse(NotNan::new(value).unwrap()), data, capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_float_rollup_trans( state: Internal, value: MaxByFloats<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec>> = value .values .values .clone() .into_iter() .map(|x| Reverse(NotNan::new(x).unwrap())) .collect(); nmost_by_rollup_trans_function( unsafe { state.to_inner::() }, &values, &value.data, value.values.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_float_final(state: Internal) -> MaxByFloats<'static> { unsafe { state.to_inner::().unwrap().clone() }.into() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn max_n_by_float_to_values( agg: MaxByFloats<'static>, _dummy: Option, ) -> TableIterator<'static, (name!(value, f64), name!(data, AnyElement))> { TableIterator::new( agg.values .values .clone() .into_iter() .zip(agg.data.clone().into_anyelement_iter()), ) } extension_sql!( "\n\ CREATE AGGREGATE max_n_by(\n\ value double precision, data AnyElement, capacity bigint\n\ ) (\n\ sfunc = max_n_by_float_trans,\n\ stype = internal,\n\ finalfunc = max_n_by_float_final\n\ );\n\ ", name = "max_n_by_float", requires = [max_n_by_float_trans, max_n_by_float_final], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ MaxByFloats\n\ ) (\n\ sfunc = max_n_by_float_rollup_trans,\n\ stype = internal,\n\ finalfunc = max_n_by_float_final\n\ );\n\ ", name = "max_n_by_float_rollup", requires = [max_n_by_float_rollup_trans, min_n_by_float_final], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn max_by_float_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val DOUBLE PRECISION, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}.0/128, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_values let mut result = client .update( "SELECT into_values(max_n_by(val, data, 3), NULL::data)::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.7734375,\"(0.7734375,3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.765625,\"(0.765625,2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.7578125,\"(0.7578125,1)\")") ); assert!(result.next().is_none()); // Test rollup let mut result = client.update( "WITH aggs as (SELECT category, max_n_by(val, data, 5) as agg from data GROUP BY category) SELECT into_values(rollup(agg), NULL::data)::TEXT FROM aggs", None, &[], ).unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.7734375,\"(0.7734375,3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.765625,\"(0.765625,2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.7578125,\"(0.7578125,1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.75,\"(0.75,0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.7421875,\"(0.7421875,3)\")") ); assert!(result.next().is_none()); }) } } ================================================ FILE: extension/src/nmost/max_by_int.rs ================================================ use pgrx::{iter::TableIterator, *}; use crate::nmost::max_int::*; use crate::nmost::*; use crate::{ build, flatten, palloc::{Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; use std::cmp::Reverse; type MaxByIntTransType = NMostByTransState>; pg_type! { #[derive(Debug)] struct MaxByInts<'input> { values: MaxIntsData<'input>, // Nesting pg_types adds 8 bytes of header data: DatumStore<'input>, } } ron_inout_funcs!(MaxByInts<'input>); impl<'input> From for MaxByInts<'input> { fn from(item: MaxByIntTransType) -> Self { let (capacity, val_ary, data) = item.into_sorted_parts(); unsafe { flatten!(MaxByInts { values: build!(MaxInts { capacity: capacity as u32, elements: val_ary.len() as u32, values: val_ary .into_iter() .map(|x| x.0) .collect::>() .into() }) .0, data, }) } } } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_int_trans( state: Internal, value: i64, data: AnyElement, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_trans_function( unsafe { state.to_inner::() }, Reverse(value), data, capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_int_rollup_trans( state: Internal, value: MaxByInts<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec> = value .values .values .clone() .into_iter() .map(Reverse) .collect(); nmost_by_rollup_trans_function( unsafe { state.to_inner::() }, &values, &value.data, value.values.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_int_final(state: Internal) -> MaxByInts<'static> { unsafe { state.to_inner::().unwrap().clone() }.into() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn max_n_by_int_to_values( agg: MaxByInts<'static>, _dummy: Option, ) -> TableIterator<'static, (name!(value, i64), name!(data, AnyElement))> { TableIterator::new( agg.values .values .clone() .into_iter() .zip(agg.data.clone().into_anyelement_iter()), ) } extension_sql!( "\n\ CREATE AGGREGATE max_n_by(\n\ value bigint, data AnyElement, capacity bigint\n\ ) (\n\ sfunc = max_n_by_int_trans,\n\ stype = internal,\n\ finalfunc = max_n_by_int_final\n\ );\n\ ", name = "max_n_by_int", requires = [max_n_by_int_trans, max_n_by_int_final], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ MaxByInts\n\ ) (\n\ sfunc = max_n_by_int_rollup_trans,\n\ stype = internal,\n\ finalfunc = max_n_by_int_final\n\ );\n\ ", name = "max_n_by_int_rollup", requires = [max_n_by_int_rollup_trans, min_n_by_int_final], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn max_by_int_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update("CREATE TABLE data(val INT8, category INT)", None, &[]) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_values let mut result = client .update( "SELECT into_values(max_n_by(val, data, 3), NULL::data)::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(99,\"(99,3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(98,\"(98,2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(97,\"(97,1)\")") ); assert!(result.next().is_none()); // Test rollup let mut result = client.update( "WITH aggs as (SELECT category, max_n_by(val, data, 5) as agg from data GROUP BY category) SELECT into_values(rollup(agg), NULL::data)::TEXT FROM aggs", None, &[], ).unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(99,\"(99,3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(98,\"(98,2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(97,\"(97,1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(96,\"(96,0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(95,\"(95,3)\")") ); assert!(result.next().is_none()); }) } } ================================================ FILE: extension/src/nmost/max_by_time.rs ================================================ use pgrx::{iter::TableIterator, *}; use crate::nmost::max_time::*; use crate::nmost::*; use crate::{ build, flatten, palloc::{Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; use std::cmp::Reverse; type MaxByTimeTransType = NMostByTransState>; pg_type! { #[derive(Debug)] struct MaxByTimes<'input> { values: MaxTimesData<'input>, // Nesting pg_types adds 8 bytes of header data: DatumStore<'input>, } } ron_inout_funcs!(MaxByTimes<'input>); impl<'input> From for MaxByTimes<'input> { fn from(item: MaxByTimeTransType) -> Self { let (capacity, val_ary, data) = item.into_sorted_parts(); unsafe { flatten!(MaxByTimes { values: build!(MaxTimes { capacity: capacity as u32, elements: val_ary.len() as u32, values: val_ary .into_iter() .map(|x| x.0) .collect::>() .into() }) .0, data, }) } } } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_time_trans( state: Internal, value: crate::raw::TimestampTz, data: AnyElement, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_trans_function( unsafe { state.to_inner::() }, Reverse(value.into()), data, capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_time_rollup_trans( state: Internal, value: MaxByTimes<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec> = value .values .values .clone() .into_iter() .map(Reverse) .collect(); nmost_by_rollup_trans_function( unsafe { state.to_inner::() }, &values, &value.data, value.values.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_by_time_final(state: Internal) -> MaxByTimes<'static> { unsafe { state.to_inner::().unwrap().clone() }.into() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn max_n_by_time_to_values( agg: MaxByTimes<'static>, _dummy: Option, ) -> TableIterator< 'static, ( name!(value, crate::raw::TimestampTz), name!(data, AnyElement), ), > { TableIterator::new( agg.values .values .clone() .into_iter() .map(crate::raw::TimestampTz::from) .zip(agg.data.clone().into_anyelement_iter()), ) } extension_sql!( "\n\ CREATE AGGREGATE max_n_by(\n\ value timestamptz, data AnyElement, capacity bigint\n\ ) (\n\ sfunc = max_n_by_time_trans,\n\ stype = internal,\n\ finalfunc = max_n_by_time_final\n\ );\n\ ", name = "max_n_by_time", requires = [max_n_by_time_trans, max_n_by_time_final], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ MaxByTimes\n\ ) (\n\ sfunc = max_n_by_time_rollup_trans,\n\ stype = internal,\n\ finalfunc = max_n_by_time_final\n\ );\n\ ", name = "max_n_by_time_rollup", requires = [max_n_by_time_rollup_trans, min_n_by_time_final], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn max_by_time_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val TIMESTAMPTZ, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client.update( &format!("INSERT INTO data VALUES ('2020-1-1 UTC'::timestamptz + {} * '1d'::interval, {})", i, i % 4), None, &[] ).unwrap(); } // Test into_values let mut result = client .update( "SELECT into_values(max_n_by(val, data, 3), NULL::data)::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-09 00:00:00+00\",\"(\"\"2020-04-09 00:00:00+00\"\",3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-08 00:00:00+00\",\"(\"\"2020-04-08 00:00:00+00\"\",2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-07 00:00:00+00\",\"(\"\"2020-04-07 00:00:00+00\"\",1)\")") ); assert!(result.next().is_none()); // Test rollup let mut result = client.update( "WITH aggs as (SELECT category, max_n_by(val, data, 5) as agg from data GROUP BY category) SELECT into_values(rollup(agg), NULL::data)::TEXT FROM aggs", None, &[], ).unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-09 00:00:00+00\",\"(\"\"2020-04-09 00:00:00+00\"\",3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-08 00:00:00+00\",\"(\"\"2020-04-08 00:00:00+00\"\",2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-07 00:00:00+00\",\"(\"\"2020-04-07 00:00:00+00\"\",1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-06 00:00:00+00\",\"(\"\"2020-04-06 00:00:00+00\"\",0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-04-05 00:00:00+00\",\"(\"\"2020-04-05 00:00:00+00\"\",3)\")") ); assert!(result.next().is_none()); }) } } ================================================ FILE: extension/src/nmost/max_float.rs ================================================ use pgrx::{iter::SetOfIterator, *}; use crate::nmost::*; use crate::{ accessors::{AccessorIntoArray, AccessorIntoValues}, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, raw::bytea, ron_inout_funcs, }; use ordered_float::NotNan; use std::cmp::Reverse; type MaxFloatTransType = NMostTransState>>; pg_type! { #[derive(Debug)] struct MaxFloats <'input> { capacity : u32, elements : u32, values : [f64; self.elements], } } ron_inout_funcs!(MaxFloats<'input>); impl<'input> From<&mut MaxFloatTransType> for MaxFloats<'input> { fn from(item: &mut MaxFloatTransType) -> Self { let heap = std::mem::take(&mut item.heap); unsafe { flatten!(MaxFloats { capacity: item.capacity as u32, elements: heap.len() as u32, values: heap .into_sorted_vec() .into_iter() .map(|x| f64::from(x.0)) .collect::>() .into() }) } } } #[pg_extern(immutable, parallel_safe)] pub fn max_n_float_trans( state: Internal, value: f64, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_function( unsafe { state.to_inner::() }, Reverse(NotNan::new(value).unwrap()), capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_float_rollup_trans( state: Internal, value: MaxFloats<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec>> = value .values .clone() .into_iter() .map(|x| Reverse(NotNan::new(x).unwrap())) .collect(); nmost_rollup_trans_function( unsafe { state.to_inner::() }, &values, value.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_float_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_combine( unsafe { state1.to_inner::() }, unsafe { state2.to_inner::() }, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_float_serialize(state: Internal) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn max_n_float_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: MaxFloatTransType = crate::do_deserialize!(bytes, MaxFloatTransType); Internal::new(i).into() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_float_final(state: Internal) -> MaxFloats<'static> { unsafe { &mut *state.to_inner::().unwrap() }.into() } #[pg_extern(name = "into_array", immutable, parallel_safe)] pub fn max_n_float_to_array(agg: MaxFloats<'static>) -> Vec { agg.values.clone().into_vec() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn max_n_float_to_values(agg: MaxFloats<'static>) -> SetOfIterator<'static, f64> { SetOfIterator::new(agg.values.clone().into_iter()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_max_float_into_values( agg: MaxFloats<'static>, _accessor: AccessorIntoValues, ) -> SetOfIterator<'static, f64> { max_n_float_to_values(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_max_float_into_array( agg: MaxFloats<'static>, _accessor: AccessorIntoArray, ) -> Vec { max_n_float_to_array(agg) } extension_sql!( "\n\ CREATE AGGREGATE max_n(\n\ value double precision, capacity bigint\n\ ) (\n\ sfunc = max_n_float_trans,\n\ stype = internal,\n\ combinefunc = max_n_float_combine,\n\ parallel = safe,\n\ serialfunc = max_n_float_serialize,\n\ deserialfunc = max_n_float_deserialize,\n\ finalfunc = max_n_float_final\n\ );\n\ ", name = "max_n_float", requires = [ max_n_float_trans, max_n_float_final, max_n_float_combine, max_n_float_serialize, max_n_float_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ value MaxFloats\n\ ) (\n\ sfunc = max_n_float_rollup_trans,\n\ stype = internal,\n\ combinefunc = max_n_float_combine,\n\ parallel = safe,\n\ serialfunc = max_n_float_serialize,\n\ deserialfunc = max_n_float_deserialize,\n\ finalfunc = max_n_float_final\n\ );\n\ ", name = "max_n_float_rollup", requires = [ max_n_float_rollup_trans, max_n_float_final, max_n_float_combine, max_n_float_serialize, max_n_float_deserialize ], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn max_float_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val DOUBLE PRECISION, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}.0/128, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_array let result = client .update("SELECT into_array(max_n(val, 5)) from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!( result.unwrap(), vec![99. / 128., 98. / 128., 97. / 128., 96. / 128., 95. / 128.] ); let result = client .update("SELECT max_n(val, 5)->into_array() from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!( result.unwrap(), vec![99. / 128., 98. / 128., 97. / 128., 96. / 128., 95. / 128.] ); // Test into_values let mut result = client .update( "SELECT into_values(max_n(val, 3))::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("0.7734375") ); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0.765625")); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("0.7578125") ); assert!(result.next().is_none()); let mut result = client .update( "SELECT (max_n(val, 3)->into_values())::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("0.7734375") ); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0.765625")); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("0.7578125") ); assert!(result.next().is_none()); // Test rollup let result = client.update( "WITH aggs as (SELECT category, max_n(val, 5) as agg from data GROUP BY category) SELECT into_array(rollup(agg)) FROM aggs", None, &[], ).unwrap().first().get_one::>().unwrap(); assert_eq!( result.unwrap(), vec![99. / 128., 98. / 128., 97. / 128., 96. / 128., 95. / 128.] ); }) } } ================================================ FILE: extension/src/nmost/max_int.rs ================================================ use pgrx::{iter::SetOfIterator, *}; use crate::nmost::*; use crate::{ accessors::{AccessorIntoArray, AccessorIntoValues}, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, raw::bytea, ron_inout_funcs, }; use std::cmp::Reverse; type MaxIntTransType = NMostTransState>; pg_type! { #[derive(Debug)] struct MaxInts<'input> { capacity : u32, elements : u32, values : [i64; self.elements], } } ron_inout_funcs!(MaxInts<'input>); impl<'input> From<&mut MaxIntTransType> for MaxInts<'input> { fn from(item: &mut MaxIntTransType) -> Self { let heap = std::mem::take(&mut item.heap); unsafe { flatten!(MaxInts { capacity: item.capacity as u32, elements: heap.len() as u32, values: heap .into_sorted_vec() .into_iter() .map(|x| x.0) .collect::>() .into() }) } } } #[pg_extern(immutable, parallel_safe)] pub fn max_n_int_trans( state: Internal, value: i64, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_function( unsafe { state.to_inner::() }, Reverse(value), capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_int_rollup_trans( state: Internal, value: MaxInts<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec> = value.values.clone().into_iter().map(Reverse).collect(); nmost_rollup_trans_function( unsafe { state.to_inner::() }, &values, value.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_int_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_combine( unsafe { state1.to_inner::() }, unsafe { state2.to_inner::() }, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_int_serialize(state: Internal) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn max_n_int_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: MaxIntTransType = crate::do_deserialize!(bytes, MaxIntTransType); Internal::new(i).into() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_int_final(state: Internal) -> MaxInts<'static> { unsafe { &mut *state.to_inner::().unwrap() }.into() } #[pg_extern(name = "into_array", immutable, parallel_safe)] pub fn max_n_int_to_array(agg: MaxInts<'static>) -> Vec { agg.values.clone().into_vec() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn max_n_int_to_values(agg: MaxInts<'static>) -> SetOfIterator<'static, i64> { SetOfIterator::new(agg.values.clone().into_iter()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_max_int_into_values( agg: MaxInts<'static>, _accessor: AccessorIntoValues, ) -> SetOfIterator<'static, i64> { max_n_int_to_values(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_max_int_into_array(agg: MaxInts<'static>, _accessor: AccessorIntoArray) -> Vec { max_n_int_to_array(agg) } extension_sql!( "\n\ CREATE AGGREGATE max_n(\n\ value bigint, capacity bigint\n\ ) (\n\ sfunc = max_n_int_trans,\n\ stype = internal,\n\ combinefunc = max_n_int_combine,\n\ parallel = safe,\n\ serialfunc = max_n_int_serialize,\n\ deserialfunc = max_n_int_deserialize,\n\ finalfunc = max_n_int_final\n\ );\n\ ", name = "max_n_int", requires = [ max_n_int_trans, max_n_int_final, max_n_int_combine, max_n_int_serialize, max_n_int_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ value MaxInts\n\ ) (\n\ sfunc = max_n_int_rollup_trans,\n\ stype = internal,\n\ combinefunc = max_n_int_combine,\n\ parallel = safe,\n\ serialfunc = max_n_int_serialize,\n\ deserialfunc = max_n_int_deserialize,\n\ finalfunc = max_n_int_final\n\ );\n\ ", name = "max_n_int_rollup", requires = [ max_n_int_rollup_trans, max_n_int_final, max_n_int_combine, max_n_int_serialize, max_n_int_deserialize ], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn max_int_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update("CREATE TABLE data(val INT8, category INT)", None, &[]) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_array let result = client .update("SELECT into_array(max_n(val, 5)) from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!(result.unwrap(), vec![99, 98, 97, 96, 95]); let result = client .update("SELECT max_n(val, 5)->into_array() from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!(result.unwrap(), vec![99, 98, 97, 96, 95]); // Test into_values let mut result = client .update( "SELECT into_values(max_n(val, 3))::TEXT from data", None, &[], ) .unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("99")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("98")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("97")); assert!(result.next().is_none()); let mut result = client .update( "SELECT (max_n(val, 3)->into_values())::TEXT from data", None, &[], ) .unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("99")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("98")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("97")); assert!(result.next().is_none()); // Test rollup let result = client.update( "WITH aggs as (SELECT category, max_n(val, 5) as agg from data GROUP BY category) SELECT into_array(rollup(agg)) FROM aggs", None, &[], ).unwrap().first().get_one::>().unwrap(); assert_eq!(result.unwrap(), vec![99, 98, 97, 96, 95]); }) } } ================================================ FILE: extension/src/nmost/max_time.rs ================================================ use pgrx::{iter::SetOfIterator, *}; use crate::nmost::*; use crate::{ accessors::{AccessorIntoArray, AccessorIntoValues}, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, raw::bytea, ron_inout_funcs, }; use std::cmp::Reverse; type MaxTimeTransType = NMostTransState>; pg_type! { #[derive(Debug)] struct MaxTimes <'input> { capacity : u32, elements : u32, values : [pg_sys::TimestampTz; self.elements], } } ron_inout_funcs!(MaxTimes<'input>); impl<'input> From<&mut MaxTimeTransType> for MaxTimes<'input> { fn from(item: &mut MaxTimeTransType) -> Self { let heap = std::mem::take(&mut item.heap); unsafe { flatten!(MaxTimes { capacity: item.capacity as u32, elements: heap.len() as u32, values: heap .into_sorted_vec() .into_iter() .map(|x| x.0) .collect::>() .into() }) } } } #[pg_extern(immutable, parallel_safe)] pub fn max_n_time_trans( state: Internal, value: crate::raw::TimestampTz, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_function( unsafe { state.to_inner::() }, Reverse(value.into()), capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_time_rollup_trans( state: Internal, value: MaxTimes<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec> = value.values.clone().into_iter().map(Reverse).collect(); nmost_rollup_trans_function( unsafe { state.to_inner::() }, &values, value.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_time_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_combine( unsafe { state1.to_inner::() }, unsafe { state2.to_inner::() }, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_time_serialize(state: Internal) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn max_n_time_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: MaxTimeTransType = crate::do_deserialize!(bytes, MaxTimeTransType); Internal::new(i).into() } #[pg_extern(immutable, parallel_safe)] pub fn max_n_time_final(state: Internal) -> MaxTimes<'static> { unsafe { &mut *state.to_inner::().unwrap() }.into() } #[pg_extern(name = "into_array", immutable, parallel_safe)] pub fn max_n_time_to_array(agg: MaxTimes<'static>) -> Vec { agg.values .clone() .into_iter() .map(crate::raw::TimestampTz::from) .collect() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn max_n_time_to_values( agg: MaxTimes<'static>, ) -> SetOfIterator<'static, crate::raw::TimestampTz> { SetOfIterator::new( agg.values .clone() .into_iter() .map(crate::raw::TimestampTz::from), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_max_time_into_values( agg: MaxTimes<'static>, _accessor: AccessorIntoValues, ) -> SetOfIterator<'static, crate::raw::TimestampTz> { max_n_time_to_values(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_max_time_into_array( agg: MaxTimes<'static>, _accessor: AccessorIntoArray, ) -> Vec { max_n_time_to_array(agg) } extension_sql!( "\n\ CREATE AGGREGATE max_n(\n\ value timestamptz, capacity bigint\n\ ) (\n\ sfunc = max_n_time_trans,\n\ stype = internal,\n\ combinefunc = max_n_time_combine,\n\ parallel = safe,\n\ serialfunc = max_n_time_serialize,\n\ deserialfunc = max_n_time_deserialize,\n\ finalfunc = max_n_time_final\n\ );\n\ ", name = "max_n_time", requires = [ max_n_time_trans, max_n_time_final, max_n_time_combine, max_n_time_serialize, max_n_time_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ value MaxTimes\n\ ) (\n\ sfunc = max_n_time_rollup_trans,\n\ stype = internal,\n\ combinefunc = max_n_time_combine,\n\ parallel = safe,\n\ serialfunc = max_n_time_serialize,\n\ deserialfunc = max_n_time_deserialize,\n\ finalfunc = max_n_time_final\n\ );\n\ ", name = "max_n_time_rollup", requires = [ max_n_time_rollup_trans, max_n_time_final, max_n_time_combine, max_n_time_serialize, max_n_time_deserialize ], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn max_time_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val TIMESTAMPTZ, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client.update( &format!("INSERT INTO data VALUES ('2020-1-1 UTC'::timestamptz + {} * '1d'::interval, {})", i, i % 4), None, &[] ).unwrap(); } // Test into_array let result = client .update( "SELECT into_array(max_n(val, 5))::TEXT from data", None, &[], ) .unwrap() .first() .get_one::<&str>() .unwrap(); assert_eq!(result.unwrap(), "{\"2020-04-09 00:00:00+00\",\"2020-04-08 00:00:00+00\",\"2020-04-07 00:00:00+00\",\"2020-04-06 00:00:00+00\",\"2020-04-05 00:00:00+00\"}"); let result = client .update( "SELECT (max_n(val, 5)->into_array())::TEXT from data", None, &[], ) .unwrap() .first() .get_one::<&str>() .unwrap(); assert_eq!(result.unwrap(), "{\"2020-04-09 00:00:00+00\",\"2020-04-08 00:00:00+00\",\"2020-04-07 00:00:00+00\",\"2020-04-06 00:00:00+00\",\"2020-04-05 00:00:00+00\"}"); // Test into_values let mut result = client .update( "SELECT into_values(max_n(val, 3))::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-04-09 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-04-08 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-04-07 00:00:00+00") ); assert!(result.next().is_none()); let mut result = client .update( "SELECT (max_n(val, 3)->into_values())::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-04-09 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-04-08 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-04-07 00:00:00+00") ); assert!(result.next().is_none()); // Test rollup let result = client.update( "WITH aggs as (SELECT category, max_n(val, 5) as agg from data GROUP BY category) SELECT into_array(rollup(agg))::TEXT FROM aggs", None, &[], ).unwrap().first().get_one::<&str>().unwrap(); assert_eq!(result.unwrap(), "{\"2020-04-09 00:00:00+00\",\"2020-04-08 00:00:00+00\",\"2020-04-07 00:00:00+00\",\"2020-04-06 00:00:00+00\",\"2020-04-05 00:00:00+00\"}"); }) } } ================================================ FILE: extension/src/nmost/min_by_float.rs ================================================ use pgrx::{iter::TableIterator, *}; use crate::nmost::min_float::*; use crate::nmost::*; use crate::{ build, flatten, palloc::{Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; use ordered_float::NotNan; type MinByFloatTransType = NMostByTransState>; pg_type! { #[derive(Debug)] struct MinByFloats<'input> { values: MinFloatsData<'input>, // Nesting pg_types adds 8 bytes of header data: DatumStore<'input>, } } ron_inout_funcs!(MinByFloats<'input>); impl<'input> From for MinByFloats<'input> { fn from(item: MinByFloatTransType) -> Self { let (capacity, val_ary, data) = item.into_sorted_parts(); unsafe { flatten!(MinByFloats { values: build!(MinFloats { capacity: capacity as u32, elements: val_ary.len() as u32, values: val_ary .into_iter() .map(f64::from) .collect::>() .into() }) .0, data, }) } } } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_float_trans( state: Internal, value: f64, data: AnyElement, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_trans_function( unsafe { state.to_inner::() }, NotNan::new(value).unwrap(), data, capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_float_rollup_trans( state: Internal, value: MinByFloats<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec> = value .values .values .clone() .into_iter() .map(|x| NotNan::new(x).unwrap()) .collect(); nmost_by_rollup_trans_function( unsafe { state.to_inner::() }, &values, &value.data, value.values.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_float_final(state: Internal) -> MinByFloats<'static> { unsafe { state.to_inner::().unwrap().clone() }.into() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn min_n_by_float_to_values( agg: MinByFloats<'static>, _dummy: Option, ) -> TableIterator<'static, (name!(value, f64), name!(data, AnyElement))> { TableIterator::new( agg.values .values .clone() .into_iter() .zip(agg.data.clone().into_anyelement_iter()), ) } extension_sql!( "\n\ CREATE AGGREGATE min_n_by(\n\ value double precision, data AnyElement, capacity bigint\n\ ) (\n\ sfunc = min_n_by_float_trans,\n\ stype = internal,\n\ finalfunc = min_n_by_float_final\n\ );\n\ ", name = "min_n_by_float", requires = [min_n_by_float_trans, min_n_by_float_final], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ MinByFloats\n\ ) (\n\ sfunc = min_n_by_float_rollup_trans,\n\ stype = internal,\n\ finalfunc = min_n_by_float_final\n\ );\n\ ", name = "min_n_by_float_rollup", requires = [min_n_by_float_rollup_trans, min_n_by_float_final], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn min_by_float_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val DOUBLE PRECISION, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}.0/128, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_values let mut result = client .update( "SELECT into_values(min_n_by(val, data, 3), NULL::data)::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0,\"(0,0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.0078125,\"(0.0078125,1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.015625,\"(0.015625,2)\")") ); assert!(result.next().is_none()); // Test rollup let mut result = client.update( "WITH aggs as (SELECT category, min_n_by(val, data, 5) as agg from data GROUP BY category) SELECT into_values(rollup(agg), NULL::data)::TEXT FROM aggs", None, &[], ).unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0,\"(0,0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.0078125,\"(0.0078125,1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.015625,\"(0.015625,2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.0234375,\"(0.0234375,3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0.03125,\"(0.03125,0)\")") ); assert!(result.next().is_none()); }) } } ================================================ FILE: extension/src/nmost/min_by_int.rs ================================================ use pgrx::{iter::TableIterator, *}; use crate::nmost::min_int::*; use crate::nmost::*; use crate::{ build, flatten, palloc::{Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; type MinByIntTransType = NMostByTransState; pg_type! { #[derive(Debug)] struct MinByInts<'input> { values: MinIntsData<'input>, // Nesting pg_types adds 8 bytes of header data: DatumStore<'input>, } } ron_inout_funcs!(MinByInts<'input>); impl<'input> From for MinByInts<'input> { fn from(item: MinByIntTransType) -> Self { let (capacity, val_ary, data) = item.into_sorted_parts(); unsafe { flatten!(MinByInts { values: build!(MinInts { capacity: capacity as u32, elements: val_ary.len() as u32, values: val_ary.into() }) .0, data, }) } } } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_int_trans( state: Internal, value: i64, data: AnyElement, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_trans_function( unsafe { state.to_inner::() }, value, data, capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_int_rollup_trans( state: Internal, value: MinByInts<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_rollup_trans_function( unsafe { state.to_inner::() }, value.values.values.as_slice(), &value.data, value.values.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_int_final(state: Internal) -> MinByInts<'static> { unsafe { state.to_inner::().unwrap().clone() }.into() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn min_n_by_int_to_values( agg: MinByInts<'static>, _dummy: Option, ) -> TableIterator<'static, (name!(value, i64), name!(data, AnyElement))> { TableIterator::new( agg.values .values .clone() .into_iter() .zip(agg.data.clone().into_anyelement_iter()), ) } extension_sql!( "\n\ CREATE AGGREGATE min_n_by(\n\ value bigint, data AnyElement, capacity bigint\n\ ) (\n\ sfunc = min_n_by_int_trans,\n\ stype = internal,\n\ finalfunc = min_n_by_int_final\n\ );\n\ ", name = "min_n_by_int", requires = [min_n_by_int_trans, min_n_by_int_final], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ MinByInts\n\ ) (\n\ sfunc = min_n_by_int_rollup_trans,\n\ stype = internal,\n\ finalfunc = min_n_by_int_final\n\ );\n\ ", name = "min_n_by_int_rollup", requires = [min_n_by_int_rollup_trans, min_n_by_int_final], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn min_by_int_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update("CREATE TABLE data(val INT8, category INT)", None, &[]) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_values let mut result = client .update( "SELECT into_values(min_n_by(val, data, 3), NULL::data)::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0,\"(0,0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(1,\"(1,1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(2,\"(2,2)\")") ); assert!(result.next().is_none()); // Test rollup let mut result = client.update( "WITH aggs as (SELECT category, min_n_by(val, data, 5) as agg from data GROUP BY category) SELECT into_values(rollup(agg), NULL::data)::TEXT FROM aggs", None, &[], ).unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(0,\"(0,0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(1,\"(1,1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(2,\"(2,2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(3,\"(3,3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(4,\"(4,0)\")") ); assert!(result.next().is_none()); }) } } ================================================ FILE: extension/src/nmost/min_by_time.rs ================================================ use pgrx::{iter::TableIterator, *}; use crate::nmost::min_time::*; use crate::nmost::*; use crate::{ build, flatten, palloc::{Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; type MinByTimeTransType = NMostByTransState; pg_type! { #[derive(Debug)] struct MinByTimes<'input> { values: MinTimesData<'input>, // Nesting pg_types adds 8 bytes of header data: DatumStore<'input>, } } ron_inout_funcs!(MinByTimes<'input>); impl<'input> From for MinByTimes<'input> { fn from(item: MinByTimeTransType) -> Self { let (capacity, val_ary, data) = item.into_sorted_parts(); unsafe { flatten!(MinByTimes { values: build!(MinTimes { capacity: capacity as u32, elements: val_ary.len() as u32, values: val_ary.into() }) .0, data, }) } } } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_time_trans( state: Internal, value: crate::raw::TimestampTz, data: AnyElement, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_trans_function( unsafe { state.to_inner::() }, value.into(), data, capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_time_rollup_trans( state: Internal, value: MinByTimes<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_by_rollup_trans_function( unsafe { state.to_inner::() }, value.values.values.as_slice(), &value.data, value.values.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_by_time_final(state: Internal) -> MinByTimes<'static> { unsafe { state.to_inner::().unwrap().clone() }.into() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn min_n_by_time_to_values( agg: MinByTimes<'static>, _dummy: Option, ) -> TableIterator< 'static, ( name!(value, crate::raw::TimestampTz), name!(data, AnyElement), ), > { TableIterator::new( agg.values .values .clone() .into_iter() .map(crate::raw::TimestampTz::from) .zip(agg.data.clone().into_anyelement_iter()), ) } extension_sql!( "\n\ CREATE AGGREGATE min_n_by(\n\ value timestamptz, data AnyElement, capacity bigint\n\ ) (\n\ sfunc = min_n_by_time_trans,\n\ stype = internal,\n\ finalfunc = min_n_by_time_final\n\ );\n\ ", name = "min_n_by_time", requires = [min_n_by_time_trans, min_n_by_time_final], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ MinByTimes\n\ ) (\n\ sfunc = min_n_by_time_rollup_trans,\n\ stype = internal,\n\ finalfunc = min_n_by_time_final\n\ );\n\ ", name = "min_n_by_time_rollup", requires = [min_n_by_time_rollup_trans, min_n_by_time_final], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn min_by_time_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val TIMESTAMPTZ, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client.update( &format!("INSERT INTO data VALUES ('2020-1-1 UTC'::timestamptz + {} * '1d'::interval, {})", i, i % 4), None, &[] ).unwrap(); } // Test into_values let mut result = client .update( "SELECT into_values(min_n_by(val, data, 3), NULL::data)::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",\"(\"\"2020-01-01 00:00:00+00\"\",0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-02 00:00:00+00\",\"(\"\"2020-01-02 00:00:00+00\"\",1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-03 00:00:00+00\",\"(\"\"2020-01-03 00:00:00+00\"\",2)\")") ); assert!(result.next().is_none()); // Test rollup let mut result = client.update( "WITH aggs as (SELECT category, min_n_by(val, data, 5) as agg from data GROUP BY category) SELECT into_values(rollup(agg), NULL::data)::TEXT FROM aggs", None, &[], ).unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",\"(\"\"2020-01-01 00:00:00+00\"\",0)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-02 00:00:00+00\",\"(\"\"2020-01-02 00:00:00+00\"\",1)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-03 00:00:00+00\",\"(\"\"2020-01-03 00:00:00+00\"\",2)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-04 00:00:00+00\",\"(\"\"2020-01-04 00:00:00+00\"\",3)\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-05 00:00:00+00\",\"(\"\"2020-01-05 00:00:00+00\"\",0)\")") ); assert!(result.next().is_none()); }) } } ================================================ FILE: extension/src/nmost/min_float.rs ================================================ use pgrx::{iter::SetOfIterator, *}; use crate::nmost::*; use crate::{ accessors::{AccessorIntoArray, AccessorIntoValues}, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, raw::bytea, ron_inout_funcs, }; use ordered_float::NotNan; type MinFloatTransType = NMostTransState>; pg_type! { #[derive(Debug)] struct MinFloats <'input> { capacity : u32, elements : u32, values : [f64; self.elements], } } ron_inout_funcs!(MinFloats<'input>); impl<'input> From<&mut MinFloatTransType> for MinFloats<'input> { fn from(item: &mut MinFloatTransType) -> Self { let heap = std::mem::take(&mut item.heap); unsafe { flatten!(MinFloats { capacity: item.capacity as u32, elements: heap.len() as u32, values: heap .into_sorted_vec() .into_iter() .map(f64::from) .collect::>() .into() }) } } } #[pg_extern(immutable, parallel_safe)] pub fn min_n_float_trans( state: Internal, value: f64, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_function( unsafe { state.to_inner::() }, NotNan::new(value).unwrap(), capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_float_rollup_trans( state: Internal, value: MinFloats<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { let values: Vec> = value .values .clone() .into_iter() .map(|x| NotNan::new(x).unwrap()) .collect(); nmost_rollup_trans_function( unsafe { state.to_inner::() }, &values, value.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_float_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_combine( unsafe { state1.to_inner::() }, unsafe { state2.to_inner::() }, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_float_serialize(state: Internal) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn min_n_float_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: MinFloatTransType = crate::do_deserialize!(bytes, MinFloatTransType); Internal::new(i).into() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_float_final(state: Internal) -> MinFloats<'static> { unsafe { &mut *state.to_inner::().unwrap() }.into() } #[pg_extern(name = "into_array", immutable, parallel_safe)] pub fn min_n_float_to_array(agg: MinFloats<'static>) -> Vec { agg.values.clone().into_vec() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn min_n_float_to_values(agg: MinFloats<'static>) -> SetOfIterator<'static, f64> { SetOfIterator::new(agg.values.clone().into_iter()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_min_float_into_values( agg: MinFloats<'static>, _accessor: AccessorIntoValues, ) -> SetOfIterator<'static, f64> { min_n_float_to_values(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_min_float_into_array( agg: MinFloats<'static>, _accessor: AccessorIntoArray, ) -> Vec { min_n_float_to_array(agg) } extension_sql!( "\n\ CREATE AGGREGATE min_n(\n\ value double precision, capacity bigint\n\ ) (\n\ sfunc = min_n_float_trans,\n\ stype = internal,\n\ combinefunc = min_n_float_combine,\n\ parallel = safe,\n\ serialfunc = min_n_float_serialize,\n\ deserialfunc = min_n_float_deserialize,\n\ finalfunc = min_n_float_final\n\ );\n\ ", name = "min_n_float", requires = [ min_n_float_trans, min_n_float_final, min_n_float_combine, min_n_float_serialize, min_n_float_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ value MinFloats\n\ ) (\n\ sfunc = min_n_float_rollup_trans,\n\ stype = internal,\n\ combinefunc = min_n_float_combine,\n\ parallel = safe,\n\ serialfunc = min_n_float_serialize,\n\ deserialfunc = min_n_float_deserialize,\n\ finalfunc = min_n_float_final\n\ );\n\ ", name = "min_n_float_rollup", requires = [ min_n_float_rollup_trans, min_n_float_final, min_n_float_combine, min_n_float_serialize, min_n_float_deserialize ], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn min_float_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val DOUBLE PRECISION, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}.0/128, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_array let result = client .update("SELECT into_array(min_n(val, 5)) from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!( result.unwrap(), vec![0. / 128., 1. / 128., 2. / 128., 3. / 128., 4. / 128.] ); let result = client .update("SELECT min_n(val, 5)->into_array() from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!( result.unwrap(), vec![0. / 128., 1. / 128., 2. / 128., 3. / 128., 4. / 128.] ); // Test into_values let mut result = client .update( "SELECT into_values(min_n(val, 3))::TEXT from data", None, &[], ) .unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0")); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("0.0078125") ); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0.015625")); assert!(result.next().is_none()); let mut result = client .update( "SELECT (min_n(val, 3)->into_values())::TEXT from data", None, &[], ) .unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0")); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("0.0078125") ); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0.015625")); assert!(result.next().is_none()); // Test rollup let result = client.update( "WITH aggs as (SELECT category, min_n(val, 5) as agg from data GROUP BY category) SELECT into_array(rollup(agg)) FROM aggs", None, &[], ).unwrap().first().get_one::>(); assert_eq!( result.unwrap().unwrap(), vec![0. / 128., 1. / 128., 2. / 128., 3. / 128., 4. / 128.] ); }) } } ================================================ FILE: extension/src/nmost/min_int.rs ================================================ use pgrx::{iter::SetOfIterator, *}; use crate::nmost::*; use crate::{ accessors::{AccessorIntoArray, AccessorIntoValues}, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, raw::bytea, ron_inout_funcs, }; type MinIntTransType = NMostTransState; pg_type! { #[derive(Debug)] struct MinInts <'input> { capacity : u32, elements : u32, values : [i64; self.elements], } } ron_inout_funcs!(MinInts<'input>); impl<'input> From<&mut MinIntTransType> for MinInts<'input> { fn from(item: &mut MinIntTransType) -> Self { let heap = std::mem::take(&mut item.heap); unsafe { flatten!(MinInts { capacity: item.capacity as u32, elements: heap.len() as u32, values: heap.into_sorted_vec().into() }) } } } #[pg_extern(immutable, parallel_safe)] pub fn min_n_int_trans( state: Internal, value: i64, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_function( unsafe { state.to_inner::() }, value, capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_int_rollup_trans( state: Internal, value: MinInts<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_rollup_trans_function( unsafe { state.to_inner::() }, value.values.as_slice(), value.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_int_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_combine( unsafe { state1.to_inner::() }, unsafe { state2.to_inner::() }, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_int_serialize(state: Internal) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn min_n_int_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: MinIntTransType = crate::do_deserialize!(bytes, MinIntTransType); Internal::new(i).into() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_int_final(state: Internal) -> MinInts<'static> { unsafe { &mut *state.to_inner::().unwrap() }.into() } #[pg_extern(name = "into_array", immutable, parallel_safe)] pub fn min_n_int_to_array(agg: MinInts<'static>) -> Vec { agg.values.clone().into_vec() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn min_n_int_to_values(agg: MinInts<'static>) -> SetOfIterator<'static, i64> { SetOfIterator::new(agg.values.clone().into_iter()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_min_int_into_values( agg: MinInts<'static>, _accessor: AccessorIntoValues, ) -> SetOfIterator<'static, i64> { min_n_int_to_values(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_min_int_into_array(agg: MinInts<'static>, _accessor: AccessorIntoArray) -> Vec { min_n_int_to_array(agg) } extension_sql!( "\n\ CREATE AGGREGATE min_n(\n\ value bigint, capacity bigint\n\ ) (\n\ sfunc = min_n_int_trans,\n\ stype = internal,\n\ combinefunc = min_n_int_combine,\n\ parallel = safe,\n\ serialfunc = min_n_int_serialize,\n\ deserialfunc = min_n_int_deserialize,\n\ finalfunc = min_n_int_final\n\ );\n\ ", name = "min_n_int", requires = [ min_n_int_trans, min_n_int_final, min_n_int_combine, min_n_int_serialize, min_n_int_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ value MinInts\n\ ) (\n\ sfunc = min_n_int_rollup_trans,\n\ stype = internal,\n\ combinefunc = min_n_int_combine,\n\ parallel = safe,\n\ serialfunc = min_n_int_serialize,\n\ deserialfunc = min_n_int_deserialize,\n\ finalfunc = min_n_int_final\n\ );\n\ ", name = "min_n_int_rollup", requires = [ min_n_int_rollup_trans, min_n_int_final, min_n_int_combine, min_n_int_serialize, min_n_int_deserialize ], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn min_int_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update("CREATE TABLE data(val INT8, category INT)", None, &[]) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client .update( &format!("INSERT INTO data VALUES ({}, {})", i, i % 4), None, &[], ) .unwrap(); } // Test into_array let result = client .update("SELECT into_array(min_n(val, 5)) from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!(result.unwrap(), vec![0, 1, 2, 3, 4]); let result = client .update("SELECT min_n(val, 5)->into_array() from data", None, &[]) .unwrap() .first() .get_one::>() .unwrap(); assert_eq!(result.unwrap(), vec![0, 1, 2, 3, 4]); // Test into_values let mut result = client .update( "SELECT into_values(min_n(val, 3))::TEXT from data", None, &[], ) .unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("1")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("2")); assert!(result.next().is_none()); let mut result = client .update( "SELECT (min_n(val, 3)->into_values())::TEXT from data", None, &[], ) .unwrap(); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("0")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("1")); assert_eq!(result.next().unwrap()[1].value().unwrap(), Some("2")); assert!(result.next().is_none()); // Test rollup let result = client.update( "WITH aggs as (SELECT category, min_n(val, 5) as agg from data GROUP BY category) SELECT into_array(rollup(agg)) FROM aggs", None, &[], ).unwrap().first().get_one::>().unwrap(); assert_eq!(result.unwrap(), vec![0, 1, 2, 3, 4]); }) } } ================================================ FILE: extension/src/nmost/min_time.rs ================================================ use pgrx::{iter::SetOfIterator, *}; use crate::nmost::*; use crate::{ accessors::{AccessorIntoArray, AccessorIntoValues}, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, raw::bytea, ron_inout_funcs, }; type MinTimeTransType = NMostTransState; pg_type! { #[derive(Debug)] struct MinTimes <'input> { capacity : u32, elements : u32, values : [pg_sys::TimestampTz; self.elements], } } ron_inout_funcs!(MinTimes<'input>); impl<'input> From<&mut MinTimeTransType> for MinTimes<'input> { fn from(item: &mut MinTimeTransType) -> Self { let heap = std::mem::take(&mut item.heap); unsafe { flatten!(MinTimes { capacity: item.capacity as u32, elements: heap.len() as u32, values: heap.into_sorted_vec().into() }) } } } #[pg_extern(immutable, parallel_safe)] pub fn min_n_time_trans( state: Internal, value: crate::raw::TimestampTz, capacity: i64, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_function( unsafe { state.to_inner::() }, value.into(), capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_time_rollup_trans( state: Internal, value: MinTimes<'static>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_rollup_trans_function( unsafe { state.to_inner::() }, value.values.as_slice(), value.capacity as usize, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_time_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { nmost_trans_combine( unsafe { state1.to_inner::() }, unsafe { state2.to_inner::() }, fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_time_serialize(state: Internal) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn min_n_time_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: MinTimeTransType = crate::do_deserialize!(bytes, MinTimeTransType); Internal::new(i).into() } #[pg_extern(immutable, parallel_safe)] pub fn min_n_time_final(state: Internal) -> MinTimes<'static> { unsafe { &mut *state.to_inner::().unwrap() }.into() } #[pg_extern(name = "into_array", immutable, parallel_safe)] pub fn min_n_time_to_array(agg: MinTimes<'static>) -> Vec { agg.values .clone() .into_iter() .map(crate::raw::TimestampTz::from) .collect() } #[pg_extern(name = "into_values", immutable, parallel_safe)] pub fn min_n_time_to_values( agg: MinTimes<'static>, ) -> SetOfIterator<'static, crate::raw::TimestampTz> { SetOfIterator::new( agg.values .clone() .into_iter() .map(crate::raw::TimestampTz::from), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_min_time_into_values( agg: MinTimes<'static>, _accessor: AccessorIntoValues, ) -> SetOfIterator<'static, crate::raw::TimestampTz> { min_n_time_to_values(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_min_time_into_array( agg: MinTimes<'static>, _accessor: AccessorIntoArray, ) -> Vec { min_n_time_to_array(agg) } extension_sql!( "\n\ CREATE AGGREGATE min_n(\n\ value timestamptz, capacity bigint\n\ ) (\n\ sfunc = min_n_time_trans,\n\ stype = internal,\n\ combinefunc = min_n_time_combine,\n\ parallel = safe,\n\ serialfunc = min_n_time_serialize,\n\ deserialfunc = min_n_time_deserialize,\n\ finalfunc = min_n_time_final\n\ );\n\ ", name = "min_n_time", requires = [ min_n_time_trans, min_n_time_final, min_n_time_combine, min_n_time_serialize, min_n_time_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ value MinTimes\n\ ) (\n\ sfunc = min_n_time_rollup_trans,\n\ stype = internal,\n\ combinefunc = min_n_time_combine,\n\ parallel = safe,\n\ serialfunc = min_n_time_serialize,\n\ deserialfunc = min_n_time_deserialize,\n\ finalfunc = min_n_time_final\n\ );\n\ ", name = "min_n_time_rollup", requires = [ min_n_time_rollup_trans, min_n_time_final, min_n_time_combine, min_n_time_serialize, min_n_time_deserialize ], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] fn min_time_correctness() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(val TIMESTAMPTZ, category INT)", None, &[], ) .unwrap(); for i in 0..100 { let i = (i * 83) % 100; // mess with the ordering just a little client.update( &format!("INSERT INTO data VALUES ('2020-1-1 UTC'::timestamptz + {} * '1d'::interval, {})", i, i % 4), None, &[] ).unwrap(); } // Test into_array let result = client .update( "SELECT into_array(min_n(val, 5))::TEXT from data", None, &[], ) .unwrap() .first() .get_one::<&str>() .unwrap(); assert_eq!(result.unwrap(), "{\"2020-01-01 00:00:00+00\",\"2020-01-02 00:00:00+00\",\"2020-01-03 00:00:00+00\",\"2020-01-04 00:00:00+00\",\"2020-01-05 00:00:00+00\"}"); let result = client .update( "SELECT (min_n(val, 5)->into_array())::TEXT from data", None, &[], ) .unwrap() .first() .get_one::<&str>() .unwrap(); assert_eq!(result.unwrap(), "{\"2020-01-01 00:00:00+00\",\"2020-01-02 00:00:00+00\",\"2020-01-03 00:00:00+00\",\"2020-01-04 00:00:00+00\",\"2020-01-05 00:00:00+00\"}"); // Test into_values let mut result = client .update( "SELECT into_values(min_n(val, 3))::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-01-01 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-01-02 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-01-03 00:00:00+00") ); assert!(result.next().is_none()); let mut result = client .update( "SELECT (min_n(val, 3)->into_values())::TEXT from data", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-01-01 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-01-02 00:00:00+00") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("2020-01-03 00:00:00+00") ); assert!(result.next().is_none()); // Test rollup let result = client.update( "WITH aggs as (SELECT category, min_n(val, 5) as agg from data GROUP BY category) SELECT into_array(rollup(agg))::TEXT FROM aggs", None, &[], ).unwrap().first().get_one::<&str>().unwrap(); assert_eq!(result.unwrap(), "{\"2020-01-01 00:00:00+00\",\"2020-01-02 00:00:00+00\",\"2020-01-03 00:00:00+00\",\"2020-01-04 00:00:00+00\",\"2020-01-05 00:00:00+00\"}"); }) } } ================================================ FILE: extension/src/nmost.rs ================================================ use pgrx::*; use serde::{Deserialize, Serialize}; use crate::{ aggregate_utils::in_aggregate_context, datum_utils::{deep_copy_datum, free_datum, DatumStore}, palloc::{Inner, Internal, InternalAsValue}, }; use std::collections::BinaryHeap; mod max_float; mod max_int; mod max_time; mod min_float; mod min_int; mod min_time; mod max_by_float; mod max_by_int; mod max_by_time; mod min_by_float; mod min_by_int; mod min_by_time; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct NMostTransState { capacity: usize, heap: BinaryHeap, } impl NMostTransState { fn new(capacity: usize, first_val: T) -> NMostTransState { let mut new_heap = NMostTransState { capacity, heap: BinaryHeap::with_capacity(capacity), }; new_heap.new_entry(first_val); new_heap } fn new_entry(&mut self, new_val: T) { // If at capacity see if we need to replace something if self.heap.len() == self.capacity { if !self.belongs_in_heap(&new_val) { return; } self.heap.pop(); } self.heap.push(new_val) } fn belongs_in_heap(&self, val: &T) -> bool { // Note that this will actually be '>' if T is a Reverse<...> type val < self.heap.peek().unwrap() } } impl From<(&[T], usize)> for NMostTransState { fn from(input: (&[T], usize)) -> Self { let (vals, capacity) = input; let mut state = Self::new(capacity, vals[0]); for val in vals[1..].iter() { state.new_entry(*val); } state } } fn nmost_trans_function( state: Option>>, val: T, capacity: usize, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || { if state.is_none() { return Internal::new(NMostTransState::::new(capacity, val)).to_inner(); } let mut state = state.unwrap(); state.new_entry(val); Some(state) }) } } fn nmost_rollup_trans_function( state: Option>>, sorted_vals: &[T], capacity: usize, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || { if let Some(mut state) = state { for val in sorted_vals { // The values are sorted, so as soon as we find one that shouldn't be added, we're done if !state.belongs_in_heap(val) { return Some(state); } state.new_entry(*val); } Some(state) } else { Internal::new::>((sorted_vals, capacity).into()).to_inner() } }) } } fn nmost_trans_combine( first: Option>>, second: Option>>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || { match (first, second) { (None, None) => None, (None, Some(only)) | (Some(only), None) => Internal::new(only.clone()).to_inner(), (Some(a), Some(b)) => { let mut a = a.clone(); // This could be made more efficient by iterating in the appropriate order with an early exit, but would requiring ordering the other heap for entry in b.heap.iter() { a.new_entry(*entry); } Internal::new(a).to_inner() } } }) } } // TODO: serialize and deserialize will need to be implemented with Datum handling code #[derive(Clone, Debug)] pub struct NMostByTransState { values: NMostTransState<(T, usize)>, data: Vec, oid: pg_sys::Oid, } impl NMostByTransState { fn new(capacity: usize, first_val: T, first_element: pgrx::AnyElement) -> NMostByTransState { // first entry will always have index 0 let first_val = (first_val, 0); NMostByTransState { values: NMostTransState::new(capacity, first_val), data: vec![unsafe { deep_copy_datum(first_element.datum(), first_element.oid()) }], oid: first_element.oid(), } } fn new_entry(&mut self, new_val: T, new_element: pgrx::AnyElement) { assert!(new_element.oid() == self.oid); if self.data.len() < self.values.capacity { // Not yet full, easy case self.values.new_entry((new_val, self.data.len())); self.data .push(unsafe { deep_copy_datum(new_element.datum(), new_element.oid()) }); } else if self .values .belongs_in_heap(&(new_val.clone(), self.data.len())) { // Full and value belongs in the heap (using len() for this check just keeps us from // succeeding if we tie the max heap element) let (_, index_to_replace) = *self .values .heap .peek() .expect("Can't be empty in this case"); let old_datum = std::mem::replace(&mut self.data[index_to_replace], unsafe { deep_copy_datum(new_element.datum(), new_element.oid()) }); unsafe { free_datum(old_datum, new_element.oid()) }; self.values.new_entry((new_val, index_to_replace)); } } // Sort the trans state and break it into a tuple of (capacity, values array, datum_store) fn into_sorted_parts(self) -> (usize, Vec, DatumStore<'static>) { let values = self.values; let heap = values.heap; let (val_ary, idx_ary): (Vec, Vec) = heap.into_sorted_vec().into_iter().unzip(); let mut mapped_data = vec![]; for i in idx_ary { mapped_data.push(self.data[i]); } ( values.capacity, val_ary, DatumStore::from((self.oid, mapped_data)), ) } } impl From<(&[T], &DatumStore<'_>, usize)> for NMostByTransState { fn from(in_tuple: (&[T], &DatumStore, usize)) -> Self { let (vals, data, capacity) = in_tuple; let mut elements = data.clone().into_anyelement_iter(); let mut state = Self::new(capacity, vals[0], elements.next().unwrap()); for val in vals[1..].iter() { state.new_entry(*val, elements.next().unwrap()); } state } } fn nmost_by_trans_function( state: Option>>, val: T, data: pgrx::AnyElement, capacity: usize, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || { if state.is_none() { return Internal::new(NMostByTransState::::new(capacity, val, data)).to_inner(); } let mut state = state.unwrap(); state.new_entry(val, data); Some(state) }) } } fn nmost_by_rollup_trans_function( state: Option>>, sorted_vals: &[T], datum_store: &DatumStore, capacity: usize, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || { if let Some(mut state) = state { for (val, element) in sorted_vals .iter() .zip(datum_store.clone().into_anyelement_iter()) { // The values are sorted, so as soon as we find one that shouldn't be added, we're done if !state.values.belongs_in_heap(&(*val, state.values.capacity)) { return Some(state); } state.new_entry(*val, element); } Some(state) } else { Internal::new::>((sorted_vals, datum_store, capacity).into()) .to_inner() } }) } } ================================================ FILE: extension/src/palloc.rs ================================================ use std::{ alloc::{GlobalAlloc, Layout, System}, ops::{Deref, DerefMut}, ptr::NonNull, }; use pgrx::*; pub unsafe fn in_memory_context T>(mctx: pg_sys::MemoryContext, f: F) -> T { let prev_ctx = pg_sys::CurrentMemoryContext; pg_sys::CurrentMemoryContext = mctx; let t = f(); pg_sys::CurrentMemoryContext = prev_ctx; t } pub use pgrx::Internal; /// Extension trait to translate postgres-understood `pgrx::Internal` type into /// the well-typed pointer type `Option>`. /// /// # Safety /// /// This trait should only ever be implemented for `pgrx::Internal` /// There is an lifetime constraint on the returned pointer, though this is /// currently implicit. pub unsafe trait InternalAsValue { // unsafe fn value_or T>(&mut self) -> &mut T; unsafe fn to_inner(self) -> Option>; } unsafe impl InternalAsValue for Internal { // unsafe fn value_or T>(&mut self, f: F) -> &mut T { // if let Some(t) = self.get_mut() { // t // } // *self = Internal::new(f()); // self.get_mut().unwrap() // } unsafe fn to_inner(self) -> Option> { self.unwrap() .map(|p| Inner(NonNull::new(p.cast_mut_ptr()).unwrap())) } } /// Extension trait to turn the typed pointers `Inner<...>` and /// `Option>` into the postgres-understood `pgrx::Internal` type. /// /// # Safety /// The value input must live as long as postgres expects. TODO more info pub unsafe trait ToInternal { fn internal(self) -> Option; } pub struct Inner(pub NonNull); impl Deref for Inner { type Target = T; fn deref(&self) -> &Self::Target { unsafe { self.0.as_ref() } } } impl DerefMut for Inner { fn deref_mut(&mut self) -> &mut Self::Target { unsafe { self.0.as_mut() } } } unsafe impl ToInternal for Option> { fn internal(self) -> Option { self.map(|p| Internal::from(Some(pg_sys::Datum::from(p.0.as_ptr())))) } } unsafe impl ToInternal for Inner { fn internal(self) -> Option { Some(Internal::from(Some(pg_sys::Datum::from(self.0.as_ptr())))) } } impl From for Inner { fn from(t: T) -> Self { unsafe { Internal::new(t).to_inner().unwrap() } } } // TODO these last two should probably be `unsafe` unsafe impl ToInternal for *mut T { fn internal(self) -> Option { Some(Internal::from(Some(pg_sys::Datum::from(self)))) } } unsafe impl ToInternal for *const T { fn internal(self) -> Option { Some(Internal::from(Some(pg_sys::Datum::from(self)))) } } // By default rust will `abort()` the process when the allocator returns NULL. // Since many systems can't reliably determine when an allocation will cause the // process to run out of memory, and just rely on the OOM killer cleaning up // afterwards, this is acceptable for many workloads. However, `abort()`-ing a // Postgres will restart the database, and since we often run Postgres on // systems which _can_ reliably return NULL on out-of-memory, we would like to // take advantage of this to cleanly shut down a single transaction when we fail // to allocate. Long-term the solution for this likely involves the `oom=panic` // flag[1], but at the time of writing the flag is not yet stable. // // This allocator implements a partial solution for turning out-of-memory into // transaction-rollback instead of process-abort. It is a thin shim over the // System allocator that `panic!()`s when the System allocator returns `NULL`. // In the event that still have enough remaining memory to serve the panic, this // will unwind the stack all the way to transaction-rollback. In the event we // don't even have enough memory to handle unwinding this will merely abort the // process with a panic-in-panic instead of a memory-allocation-failure. Under // the assumption that we're more likely to fail due to a few large allocations // rather than a very large number of small allocations, it seems likely that we // will have some memory remaining for unwinding, and that this will reduce the // likelihood of aborts. // // [1] `oom=panic` tracking issue: https://github.com/rust-lang/rust/issues/43596 struct PanickingAllocator; #[global_allocator] static ALLOCATOR: PanickingAllocator = PanickingAllocator; unsafe impl GlobalAlloc for PanickingAllocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { let p = System.alloc(layout); if p.is_null() { panic!("Out of memory") } p } unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { System.dealloc(ptr, layout) } unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { let p = System.alloc_zeroed(layout); if p.is_null() { panic!("Out of memory") } p } unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 { let p = System.realloc(ptr, layout, new_size); if p.is_null() { panic!("Out of memory") } p } } ================================================ FILE: extension/src/pg_any_element.rs ================================================ use std::{ collections::HashMap, hash::{Hash, Hasher}, mem::size_of, }; use pgrx::*; use pg_sys::{Datum, Oid}; use crate::datum_utils::{deep_copy_datum, DatumHashBuilder}; // Unable to implement PartialEq for AnyElement, so creating a local copy pub struct PgAnyElement { datum: Datum, typoid: Oid, } impl PgAnyElement { // pub fn from_datum_clone(datum : Datum, typoid : Oid) -> PgAnyElement { // PgAnyElement { // datum : unsafe{deep_copy_datum(datum, typoid)}, // typoid // } // } pub fn deep_copy_datum(&self) -> Datum { unsafe { deep_copy_datum(self.datum, self.typoid) } } } impl PartialEq for PgAnyElement { #[allow(clippy::field_reassign_with_default)] fn eq(&self, other: &Self) -> bool { unsafe { if self.typoid != other.typoid { false } else { // TODO JOSH can we avoid the type cache lookup here let typ = self.typoid; let tentry = pg_sys::lookup_type_cache(typ, pg_sys::TYPECACHE_EQ_OPR_FINFO as _); let flinfo = if (*tentry).eq_opr_finfo.fn_addr.is_some() { &(*tentry).eq_opr_finfo } else { pgrx::error!("no equality function"); }; let size = size_of::() + size_of::() * 2; let info = pg_sys::palloc0(size) as pg_sys::FunctionCallInfo; (*info).flinfo = flinfo as *const pg_sys::FmgrInfo as *mut pg_sys::FmgrInfo; (*info).context = std::ptr::null_mut(); (*info).resultinfo = std::ptr::null_mut(); (*info).fncollation = (*tentry).typcollation; (*info).isnull = false; (*info).nargs = 2; (*info).args.as_mut_slice(2)[0] = pg_sys::NullableDatum { value: self.datum, isnull: false, }; (*info).args.as_mut_slice(2)[1] = pg_sys::NullableDatum { value: other.datum, isnull: false, }; (*(*info).flinfo).fn_addr.unwrap()(info) != Datum::from(0) } } } } impl Eq for PgAnyElement {} impl Hash for PgAnyElement { fn hash(&self, state: &mut H) { self.datum.value().hash(state); } } impl From<(Datum, Oid)> for PgAnyElement { fn from(other: (Datum, Oid)) -> Self { let (datum, typoid) = other; PgAnyElement { datum, typoid } } } impl From for PgAnyElement { fn from(other: AnyElement) -> Self { PgAnyElement { datum: other.datum(), typoid: other.oid(), } } } pub struct PgAnyElementHashMap(pub(crate) HashMap); impl PgAnyElementHashMap { pub fn new(typoid: Oid, collation: Option) -> Self { PgAnyElementHashMap(HashMap::with_hasher(unsafe { DatumHashBuilder::from_type_id(typoid, collation) })) } pub(crate) fn with_hasher(hasher: DatumHashBuilder) -> Self { PgAnyElementHashMap(HashMap::with_hasher(hasher)) } pub fn typoid(&self) -> Oid { self.0.hasher().type_id } // Passthroughs pub fn contains_key(&self, k: &PgAnyElement) -> bool { self.0.contains_key(k) } pub fn get(&self, k: &PgAnyElement) -> Option<&V> { self.0.get(k) } pub fn get_mut(&mut self, k: &PgAnyElement) -> Option<&mut V> { self.0.get_mut(k) } pub(crate) fn hasher(&self) -> &DatumHashBuilder { self.0.hasher() } pub fn insert(&mut self, k: PgAnyElement, v: V) -> Option { self.0.insert(k, v) } pub fn len(&self) -> usize { self.0.len() } pub fn remove(&mut self, k: &PgAnyElement) -> Option { self.0.remove(k) } } ================================================ FILE: extension/src/range.rs ================================================ use counter_agg::range::I64Range; use pgrx::{extension_sql, pg_sys}; use serde::{Deserialize, Serialize}; use std::convert::TryInto; use std::slice; use flat_serialize_macro::flat_serialize; #[allow(non_camel_case_types)] pub type tstzrange = *mut pg_sys::varlena; // Derived from Postgres' range_deserialize: https://github.com/postgres/postgres/blob/27e1f14563cf982f1f4d71e21ef247866662a052/src/backend/utils/adt/rangetypes.c#L1779 // but we modify because we only allow specific types of ranges, namely [) inclusive on left and exclusive on right, as this makes a lot of logic simpler, and allows for a standard way to represent a range. #[allow(clippy::missing_safety_doc)] pub unsafe fn get_range(range: tstzrange) -> Option { let range_bytes = get_toasted_bytes(&*range); let mut range_bytes = &range_bytes[8..]; // don't care about the Header and Oid let flags = *range_bytes.last().unwrap(); let mut range = I64Range { left: None, right: None, }; if flags & RANGE_EMPTY != 0 { return None; } if range_has_lbound(flags) { let bytes = range_bytes[..8].try_into().unwrap(); range_bytes = &range_bytes[8..]; let mut left = i64::from_ne_bytes(bytes); if !lbound_inclusive(flags) { left += 1; } range.left = Some(left); } if range_has_rbound(flags) { let bytes = range_bytes[..8].try_into().unwrap(); let mut right = i64::from_ne_bytes(bytes); if rbound_inclusive(flags) { right += 1; } range.right = Some(right); } Some(range) } unsafe fn get_toasted_bytes(ptr: &pg_sys::varlena) -> &[u8] { let mut ptr = pg_sys::pg_detoast_datum_packed(ptr as *const _ as *mut _); if pgrx::varatt_is_1b(ptr) { ptr = pg_sys::pg_detoast_datum_copy(ptr as *const _ as *mut _); } let data_len = pgrx::varsize_any(ptr); slice::from_raw_parts(ptr as *mut u8, data_len) } const RANGE_EMPTY: u8 = 0x01; const RANGE_LB_INC: u8 = 0x02; const RANGE_UB_INC: u8 = 0x04; const RANGE_LB_INF: u8 = 0x08; const RANGE_UB_INF: u8 = 0x10; const RANGE_LB_NULL: u8 = 0x20; // should never be used, but why not. const RANGE_UB_NULL: u8 = 0x40; // should never be used, but why not. fn range_has_lbound(flags: u8) -> bool { flags & (RANGE_EMPTY | RANGE_LB_NULL | RANGE_LB_INF) == 0 } fn lbound_inclusive(flags: u8) -> bool { flags & RANGE_LB_INC != 0 } fn range_has_rbound(flags: u8) -> bool { (flags) & (RANGE_EMPTY | RANGE_UB_NULL | RANGE_UB_INF) == 0 } fn rbound_inclusive(flags: u8) -> bool { flags & RANGE_UB_INC != 0 } // `Option<...>` is not suitable for disk storage. `Option<...>` does not have a // well-defined layout, for instance, an `Option` takes 8 bytes while an // `Option` only takes up 4 bytes, despite them both representing // 32-bit numbers. Worse from our perspective is that the layouts of these are // not stable and they can change arbitrarily in the future, so an `Option` // as created by rust 1.50 may not have the same bytes as one created by rust // 1.51. `DiskOption<...>` is `repr(C, u64)` and thus does have a well-defined // layout: 8 bytes for the tag-bit determining if it's `None` or `Some` followed // by `size_of::()` bytes in which the type can be stored. // Before stabilization we should probably change the layout to // ``` // flat_serialize! { // is_some: bool, // value: [T; self.is_some as u8], // } // ``` flat_serialize! { #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] struct I64RangeWrapper { is_present: u8, has_left: u8, has_right: u8, padding: [u8; 5], left: i64 if self.is_present == 1 && self.has_left == 1, right: i64 if self.is_present == 1 && self.has_right == 1, } } impl I64RangeWrapper { pub fn to_i64range(&self) -> Option { if self.is_present == 0 { return None; } Some(I64Range { left: self.left, right: self.right, }) } pub fn from_i64range(b: Option) -> Self { match b { Some(range) => Self { is_present: 1, has_left: range.left.is_some().into(), has_right: range.right.is_some().into(), padding: [0; 5], left: range.left, right: range.right, }, None => Self { is_present: 0, has_left: 0, has_right: 0, padding: [0; 5], left: None, right: None, }, } } } // this introduces a timescaledb dependency, but only kind of, extension_sql!("\n\ CREATE FUNCTION toolkit_experimental.time_bucket_range( bucket_width interval, ts timestamptz)\n\ RETURNS tstzrange as $$\n\ SELECT tstzrange(time_bucket(bucket_width, ts), time_bucket(bucket_width, ts + bucket_width), '[)');\n\ $$\n\ LANGUAGE SQL IMMUTABLE PARALLEL SAFE;\n\ ", name = "time_bucket_range", ); ================================================ FILE: extension/src/raw.rs ================================================ #![allow(non_camel_case_types)] use pgrx::*; use pgrx_sql_entity_graph::metadata::{ ArgumentError, Returns, ReturnsError, SqlMapping, SqlTranslatable, }; extension_sql!( "\n\ CREATE SCHEMA toolkit_experimental;\n\ ", name = "create_experimental_schema", creates = [ Type(bytea), Type(text), Type(TimestampTz), Type(AnyElement), Type(tstzrange), Type(Interval), Type(regproc) ], bootstrap, ); // TODO temporary holdover types while we migrate from nominal types to actual macro_rules! raw_type { ($name:ident, $tyid: path, $arrayid: path) => { impl FromDatum for $name { unsafe fn from_polymorphic_datum( datum: pg_sys::Datum, is_null: bool, _typoid: pg_sys::Oid, ) -> Option where Self: Sized, { if is_null { return None; } Some(Self(datum)) } } impl IntoDatum for $name { fn into_datum(self) -> Option { Some(self.0) } fn type_oid() -> pg_sys::Oid { $tyid } } impl From for $name { fn from(d: pg_sys::Datum) -> Self { Self(d) } } impl From<$name> for pg_sys::Datum { fn from(v: $name) -> Self { v.0 } } // SAFETY: all calls to raw_type! use type names that are valid SQL unsafe impl SqlTranslatable for $name { fn argument_sql() -> Result { Ok(SqlMapping::literal(stringify!($name))) } fn return_sql() -> Result { Ok(Returns::One(SqlMapping::literal(stringify!($name)))) } } unsafe impl<'fcx> callconv::ArgAbi<'fcx> for $name { unsafe fn unbox_arg_unchecked(arg: callconv::Arg<'_, 'fcx>) -> Self { let index = arg.index(); unsafe { arg.unbox_arg_using_from_datum() .unwrap_or_else(|| panic!("argument {index} must not be null")) } } unsafe fn unbox_nullable_arg(arg: callconv::Arg<'_, 'fcx>) -> nullable::Nullable { unsafe { arg.unbox_arg_using_from_datum().into() } } } }; } #[derive(Clone, Copy)] pub struct bytea(pub pg_sys::Datum); raw_type!(bytea, pg_sys::BYTEAOID, pg_sys::BYTEAARRAYOID); unsafe impl pgrx::callconv::BoxRet for bytea { unsafe fn box_into<'fcx>( self, fcinfo: &mut pgrx::callconv::FcInfo<'fcx>, ) -> pgrx::datum::Datum<'fcx> { unsafe { fcinfo.return_raw_datum(self.0) } } } #[derive(Clone, Copy)] pub struct text(pub pg_sys::Datum); raw_type!(text, pg_sys::TEXTOID, pg_sys::TEXTARRAYOID); pub struct TimestampTz(pub pg_sys::Datum); raw_type!( TimestampTz, pg_sys::TIMESTAMPTZOID, pg_sys::TIMESTAMPTZARRAYOID ); unsafe impl pgrx::callconv::BoxRet for TimestampTz { unsafe fn box_into<'fcx>( self, fcinfo: &mut pgrx::callconv::FcInfo<'fcx>, ) -> pgrx::datum::Datum<'fcx> { unsafe { fcinfo.return_raw_datum(self.0) } } } impl From for pg_sys::TimestampTz { fn from(tstz: TimestampTz) -> Self { tstz.0.value() as _ } } impl From for TimestampTz { fn from(ts: pg_sys::TimestampTz) -> Self { Self(pg_sys::Datum::from(ts)) } } pub struct AnyElement(pub pg_sys::Datum); raw_type!(AnyElement, pg_sys::ANYELEMENTOID, pg_sys::ANYARRAYOID); pub struct tstzrange(pub pg_sys::Datum); raw_type!(tstzrange, pg_sys::TSTZRANGEOID, pg_sys::TSTZRANGEARRAYOID); pub struct Interval(pub pg_sys::Datum); raw_type!(Interval, pg_sys::INTERVALOID, pg_sys::INTERVALARRAYOID); unsafe impl pgrx::callconv::BoxRet for Interval { unsafe fn box_into<'fcx>( self, fcinfo: &mut pgrx::callconv::FcInfo<'fcx>, ) -> pgrx::datum::Datum<'fcx> { unsafe { fcinfo.return_raw_datum(self.0) } } } impl From for Interval { fn from(interval: i64) -> Self { let interval = pg_sys::Interval { time: interval, ..Default::default() }; let interval = unsafe { let ptr = pg_sys::palloc(std::mem::size_of::()) as *mut pg_sys::Interval; *ptr = interval; Interval(pg_sys::Datum::from(ptr)) }; // Now we have a valid Interval in at least one sense. But we have the // microseconds in the `time` field and `day` and `month` are both 0, // which is legal. However, directly converting one of these to TEXT // comes out quite ugly if the number of microseconds is greater than 1 day: // 8760:02:00 // Should be: // 365 days 00:02:00 // How does postgresql do it? It happens in src/backend/utils/adt/timestamp.c:timestamp_mi: // result->time = dt1 - dt2; // result = DatumGetIntervalP(DirectFunctionCall1(interval_justify_hours, // IntervalPGetDatum(result))); // So if we want the same behavior, we need to call interval_justify_hours too: let function_args = vec![Some(pg_sys::Datum::from(interval))]; unsafe { pgrx::direct_function_call(pg_sys::interval_justify_hours, &function_args) } .expect("interval_justify_hours does not return None") } } pub struct regproc(pub pg_sys::Datum); raw_type!(regproc, pg_sys::REGPROCOID, pg_sys::REGPROCARRAYOID); ================================================ FILE: extension/src/saturation.rs ================================================ //! Saturating Math for Integers use pgrx::*; /// Computes x+y, saturating at the numeric bounds instead of overflowing #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] fn saturating_add(x: i32, y: i32) -> i32 { x.saturating_add(y) } /// Computes x+y, saturating at 0 for the minimum bound instead of i32::MIN #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] fn saturating_add_pos(x: i32, y: i32) -> i32 { // check to see if abs of y is greater than the abs of x? let result = x.saturating_add(y); if result > 0 { result } else { 0 } } /// Computes x-y, saturating at the numeric bounds instead of overflowing. #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] fn saturating_sub(x: i32, y: i32) -> i32 { x.saturating_sub(y) } /// Computes x-y, saturating at 0 for the minimum bound instead of i32::MIN #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] fn saturating_sub_pos(x: i32, y: i32) -> i32 { if y > x { 0 } else { x.saturating_sub(y) } } /// Computes x*y, saturating at the numeric bounds instead of overflowing #[pg_extern(schema = "toolkit_experimental", immutable, parallel_safe)] fn saturating_mul(x: i32, y: i32) -> i32 { x.saturating_mul(y) } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_add_max() { assert_eq!(i32::MAX, saturating_add(i32::MAX, 100)); } #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_add_min() { assert_eq!(i32::MIN, saturating_add(i32::MIN, -100)); } #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_add_pos() { assert_eq!(0, saturating_add_pos(200, -350)); } #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_sub_max() { assert_eq!(i32::MAX, saturating_sub(i32::MAX, -10)); } #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_sub_min() { assert_eq!(i32::MIN, saturating_sub(i32::MIN, 10)); } #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_sub_pos() { assert_eq!(0, saturating_sub_pos(i32::MIN, 10)); } #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_mul_max() { assert_eq!(i32::MAX, saturating_mul(i32::MAX, 2)); } #[pg_test] #[allow(arithmetic_overflow)] fn test_saturating_mul_min() { assert_eq!(i32::MIN, saturating_mul(i32::MAX, -2)); } } ================================================ FILE: extension/src/serialization/collations.rs ================================================ use std::{ ffi::{CStr, CString}, mem::{align_of, size_of, MaybeUninit}, os::raw::c_char, slice, }; use flat_serialize::{impl_flat_serializable, FlatSerializable, WrapErr}; use serde::{Deserialize, Serialize}; use once_cell::sync::Lazy; use pg_sys::Oid; use pgrx::*; // TODO short collation serializer? /// `PgCollationId` provides provides the ability to serialize and deserialize /// collation Oids as `(namespace, name)` pairs. #[derive(Debug, Clone, Copy)] #[repr(transparent)] pub struct PgCollationId(pub Oid); impl_flat_serializable!(PgCollationId); impl PgCollationId { pub fn is_invalid(&self) -> bool { self.0 == pg_sys::InvalidOid } pub fn to_option_oid(self) -> Option { if self.is_invalid() { None } else { Some(self.0) } } } #[allow(non_upper_case_globals)] const Anum_pg_collation_oid: u32 = 1; // https://github.com/postgres/postgres/blob/e955bd4b6c2bcdbd253837f6cf4c7520b98e69d4/src/include/catalog/pg_collation.dat #[allow(deprecated)] // From::from is non-const pub(crate) const DEFAULT_COLLATION_OID: Oid = unsafe { Oid::from_u32_unchecked(100) }; #[allow(non_camel_case_types)] #[derive(Copy, Clone)] #[repr(C)] struct FormData_pg_collation { oid: pg_sys::Oid, collname: pg_sys::NameData, collnamespace: pg_sys::Oid, collowner: pg_sys::Oid, collprovider: c_char, collisdeterministic: bool, collencoding: i32, collcollate: pg_sys::NameData, collctype: pg_sys::NameData, } #[allow(non_camel_case_types)] type Form_pg_collation = *mut FormData_pg_collation; #[allow(non_camel_case_types)] #[derive(Copy, Clone)] #[repr(C)] struct FormData_pg_database { oid: Oid, datname: pg_sys::NameData, datdba: Oid, encoding: i32, datcollate: pg_sys::NameData, // TODO more fields I'm ignoring } #[allow(non_camel_case_types)] type Form_pg_database = *mut FormData_pg_database; static DEFAULT_COLLATION_NAME: Lazy = Lazy::new(|| unsafe { let tuple = pg_sys::SearchSysCache1( pg_sys::SysCacheIdentifier::DATABASEOID as _, pg_sys::Datum::from(pg_sys::MyDatabaseId), ); if tuple.is_null() { pgrx::error!("no database info"); } let database_tuple: Form_pg_database = get_struct(tuple); let collation_name = (*database_tuple).datcollate.data.as_ptr(); let collation_name_len = CStr::from_ptr(collation_name).to_bytes().len(); let collation_name = pg_sys::pg_server_to_any( collation_name, collation_name_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let collation_name = CStr::from_ptr(collation_name); pg_sys::ReleaseSysCache(tuple); CString::from(collation_name) }); impl Serialize for PgCollationId { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { unsafe { let mut layout: Option<(&str, &str)> = None; if self.is_invalid() { return layout.serialize(serializer); } let tuple = pg_sys::SearchSysCache1( pg_sys::SysCacheIdentifier::COLLOID as _, pg_sys::Datum::from(self.0), ); if tuple.is_null() { pgrx::error!("no collation info for oid {}", self.0.to_u32()); } let collation_tuple: Form_pg_collation = get_struct(tuple); let namespace = pg_sys::get_namespace_name((*collation_tuple).collnamespace); if namespace.is_null() { pgrx::error!( "invalid schema oid {}", (*collation_tuple).collnamespace.to_u32() ); } let namespace_len = CStr::from_ptr(namespace).to_bytes().len(); let namespace = pg_sys::pg_server_to_any( namespace, namespace_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let namespace = CStr::from_ptr(namespace); let namespace = namespace.to_str().unwrap(); // the 'default' collation isn't really a collation, and we need to // look in pg_database to discover what real collation is let collation_name = if self.0 == DEFAULT_COLLATION_OID { &*DEFAULT_COLLATION_NAME } else { let collation_name = (*collation_tuple).collname.data.as_ptr(); let collation_name_len = CStr::from_ptr(collation_name).to_bytes().len(); let collation_name = pg_sys::pg_server_to_any( collation_name, collation_name_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); CStr::from_ptr(collation_name) }; let collation_name = collation_name.to_str().unwrap(); let qualified_name: (&str, &str) = (namespace, collation_name); layout = Some(qualified_name); let res = layout.serialize(serializer); pg_sys::ReleaseSysCache(tuple); res } } } impl<'de> Deserialize<'de> for PgCollationId { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { use serde::de::Error; let collation = >::deserialize(deserializer)?; let (namespace, name) = match collation { None => return Ok(Self(pg_sys::Oid::INVALID)), Some(qualified_name) => qualified_name, }; let (namespace, name) = ( CString::new(namespace).unwrap(), CString::new(name).unwrap(), ); let (namespace_len, name_len) = (namespace.to_bytes().len(), name.to_bytes().len()); unsafe { let namespace = pg_sys::pg_any_to_server( namespace.as_ptr(), namespace_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let namespace = CStr::from_ptr(namespace); let name = pg_sys::pg_any_to_server( name.as_ptr(), name_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let name = CStr::from_ptr(name); let namespace_id = pg_sys::LookupExplicitNamespace(namespace.as_ptr(), true); if namespace_id == pg_sys::InvalidOid { return Err(D::Error::custom(format!("invalid namespace {namespace:?}"))); } // COLLNAMEENCNSP is based on a triple `(collname, collencoding, collnamespace)`, // however, `(collname, collnamespace)` is enough to uniquely determine // a collation, though we need to check both collencoding = -1 and // collencoding = DatabaseEncoding // see: // https://www.postgresql.org/docs/13/catalog-pg-collation.html // https://github.com/postgres/postgres/blob/e955bd4b6c2bcdbd253837f6cf4c7520b98e69d4/src/backend/commands/collationcmds.c#L246 let mut collation_id = pg_sys::GetSysCacheOid( pg_sys::SysCacheIdentifier::COLLNAMEENCNSP as _, Anum_pg_collation_oid as _, pg_sys::Datum::from(name.as_ptr()), pg_sys::Datum::from(pg_sys::GetDatabaseEncoding()), pg_sys::Datum::from(namespace_id), pg_sys::Datum::from(0), //unused ); if collation_id == pg_sys::InvalidOid { collation_id = pg_sys::GetSysCacheOid( pg_sys::SysCacheIdentifier::COLLNAMEENCNSP as _, Anum_pg_collation_oid as _, pg_sys::Datum::from(name.as_ptr()), pg_sys::Datum::from((-1isize) as usize), pg_sys::Datum::from(namespace_id), pg_sys::Datum::from(0), //unused ); } if collation_id == pg_sys::InvalidOid { // The default collation doesn't necessarily exist in the // collations catalog, so check that specially if name == &**DEFAULT_COLLATION_NAME { return Ok(PgCollationId(DEFAULT_COLLATION_OID)); } return Err(D::Error::custom(format!( "invalid collation {namespace:?}.{name:?}" ))); } Ok(PgCollationId(collation_id)) } } } unsafe fn get_struct(tuple: pg_sys::HeapTuple) -> *mut T { //((char *) ((TUP)->t_data) + (TUP)->t_data->t_hoff) let t_data: *mut u8 = (*tuple).t_data.cast(); let t_hoff = (*(*tuple).t_data).t_hoff; t_data.add(t_hoff as usize).cast() } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::PgCollationId; use pgrx::{pg_sys, pg_test}; #[allow(deprecated)] // no const version const COLLATION_ID_950: PgCollationId = PgCollationId(unsafe { pg_sys::Oid::from_u32_unchecked(950) }); #[allow(deprecated)] // no const version const COLLATION_ID_951: PgCollationId = PgCollationId(unsafe { pg_sys::Oid::from_u32_unchecked(951) }); // TODO is there a way we can test more of this without making it flaky? #[pg_test] fn test_pg_collation_id_serialize_default_collation_ron() { let serialized = ron::to_string(&PgCollationId( crate::serialization::collations::DEFAULT_COLLATION_OID, )) .unwrap(); let deserialized: PgCollationId = ron::from_str(&serialized).unwrap(); assert_ne!(deserialized.0, pg_sys::Oid::INVALID); let serialized = ron::to_string(&PgCollationId( crate::serialization::collations::DEFAULT_COLLATION_OID, )) .unwrap(); let deserialized2: PgCollationId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized2.0, deserialized.0); } #[pg_test] fn test_pg_collation_id_serialize_c_collation() { let serialized = bincode::serialize(&COLLATION_ID_950).unwrap(); assert_eq!( serialized, vec![ 1, 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 1, 0, 0, 0, 0, 0, 0, 0, 67 ] ); let deserialized: PgCollationId = bincode::deserialize(&serialized).unwrap(); assert_eq!(deserialized.0, COLLATION_ID_950.0); } // TODO this test may be too flaky depending on what the default collation actually is #[pg_test] fn test_pg_collation_id_serialize_c_collation_ron() { let serialized = ron::to_string(&COLLATION_ID_950).unwrap(); assert_eq!(&*serialized, "Some((\"pg_catalog\",\"C\"))",); let deserialized: PgCollationId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized.0, COLLATION_ID_950.0); } #[pg_test] fn test_pg_collation_id_serialize_posix_collation() { let serialized = bincode::serialize(&COLLATION_ID_951).unwrap(); assert_eq!( serialized, vec![ 1, 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 5, 0, 0, 0, 0, 0, 0, 0, 80, 79, 83, 73, 88 ] ); let deserialized: PgCollationId = bincode::deserialize(&serialized).unwrap(); assert_eq!(deserialized.0, COLLATION_ID_951.0); } // TODO this test may be too flaky depending on what the default collation actually is #[pg_test] fn test_pg_collation_id_serialize_posix_collation_ron() { let serialized = ron::to_string(&COLLATION_ID_951).unwrap(); assert_eq!(&*serialized, "Some((\"pg_catalog\",\"POSIX\"))",); let deserialized: PgCollationId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized.0, COLLATION_ID_951.0); } } ================================================ FILE: extension/src/serialization/functions.rs ================================================ use std::{ ffi::{CStr, CString}, mem::{align_of, size_of, MaybeUninit}, os::raw::c_char, slice, }; use flat_serialize::{impl_flat_serializable, FlatSerializable, WrapErr}; use serde::{Deserialize, Serialize}; use pg_sys::{Datum, Oid}; use pgrx::*; /// `PgProcId` provides provides the ability to serialize and deserialize /// regprocedures as `namespace.name(args)` #[derive(Debug, Clone, Copy)] #[repr(transparent)] pub struct PgProcId(pub Oid); impl_flat_serializable!(PgProcId); // FIXME upstream to pgrx // TODO use this or regprocedureout()? unsafe extern "C" { pub fn format_procedure_qualified(procedure_oid: pg_sys::Oid) -> *const c_char; } impl Serialize for PgProcId { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { unsafe { let qualified_name = format_procedure_qualified(self.0); let len = CStr::from_ptr(qualified_name).to_bytes().len(); let qualified_name = pg_sys::pg_server_to_any(qualified_name, len as _, pg_sys::pg_enc::PG_UTF8 as _); let qualified_name = CStr::from_ptr(qualified_name); let qualified_name = qualified_name.to_str().unwrap(); qualified_name.serialize(serializer) } } } impl<'de> Deserialize<'de> for PgProcId { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { // FIXME pgrx wraps all functions in rust wrappers, which makes them // uncallable with DirectFunctionCall(). Is there a way to // export both? unsafe extern "C-unwind" { #[allow(improper_ctypes)] fn regprocedurein(fcinfo: pg_sys::FunctionCallInfo) -> Datum; } let qualified_name = <&str>::deserialize(deserializer)?; let qualified_name = CString::new(qualified_name).unwrap(); let oid = unsafe { pg_sys::DirectFunctionCall1Coll( Some(regprocedurein), pg_sys::InvalidOid, pg_sys::Datum::from(qualified_name.as_ptr()), ) }; Ok(Self(Oid::from(oid.value() as u32))) } } ================================================ FILE: extension/src/serialization/types.rs ================================================ use std::{ ffi::{CStr, CString}, mem::{align_of, size_of, MaybeUninit}, slice, }; use flat_serialize::{impl_flat_serializable, FlatSerializable, WrapErr}; use serde::{Deserialize, Serialize}; use pg_sys::Oid; use pgrx::*; /// Possibly a premature optimization, `ShortTypId` provides the ability to /// serialize and deserialize type Oids as `(namespace, name)` pairs, special /// casing a number of types with hardcoded Oids that we expect to be common so /// that these types can be stored more compactly if desired. #[derive(Debug, Clone, Copy)] #[repr(transparent)] pub struct ShortTypeId(pub Oid); impl_flat_serializable!(ShortTypeId); impl From for ShortTypeId { fn from(id: Oid) -> Self { Self(id) } } impl From for Oid { fn from(id: ShortTypeId) -> Self { id.0 } } impl Serialize for ShortTypeId { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { ShortTypIdSerializer::from_oid(self.0).serialize(serializer) } } impl<'de> Deserialize<'de> for ShortTypeId { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let type_id = ShortTypIdSerializer::deserialize(deserializer)?; Ok(Self(type_id.to_oid())) } } #[derive(Debug, Serialize, Deserialize)] #[allow(clippy::upper_case_acronyms)] enum ShortTypIdSerializer { BOOL, BYTEA, CHAR, NAME, INT8, INT2, INT2VECTOR, INT4, REGPROC, TEXT, JSON, XML, POINT, FLOAT4, FLOAT8, MACADDR8, VARCHAR, DATE, TIME, TIMESTAMP, TIMESTAMPTZ, INTERVAL, TIMETZ, JSONB, BOOLARRAY, BYTEAARRAY, CHARARRAY, NAMEARRAY, INT8ARRAY, INT2ARRAY, INT4ARRAY, TEXTARRAY, FLOAT4ARRAY, FLOAT8ARRAY, DATEARRAY, TIMEARRAY, TIMESTAMPARRAY, TIMESTAMPTZARRAY, INTERVALARRAY, TIMETZARRAY, NUMERICARRAY, JSONBARRAY, #[serde(rename = "Type")] Other(PgTypId), } impl ShortTypIdSerializer { pub fn from_oid(oid: Oid) -> Self { use ShortTypIdSerializer::*; match oid { pg_sys::BOOLOID => BOOL, pg_sys::BYTEAOID => BYTEA, pg_sys::CHAROID => CHAR, pg_sys::NAMEOID => NAME, pg_sys::INT8OID => INT8, pg_sys::INT2OID => INT2, pg_sys::INT2VECTOROID => INT2VECTOR, pg_sys::INT4OID => INT4, pg_sys::REGPROCOID => REGPROC, pg_sys::TEXTOID => TEXT, pg_sys::JSONOID => JSON, pg_sys::XMLOID => XML, pg_sys::POINTOID => POINT, pg_sys::FLOAT4OID => FLOAT4, pg_sys::FLOAT8OID => FLOAT8, pg_sys::MACADDR8OID => MACADDR8, pg_sys::VARCHAROID => VARCHAR, pg_sys::DATEOID => DATE, pg_sys::TIMEOID => TIME, pg_sys::TIMESTAMPOID => TIMESTAMP, pg_sys::TIMESTAMPTZOID => TIMESTAMPTZ, pg_sys::INTERVALOID => INTERVAL, pg_sys::TIMETZOID => TIMETZ, pg_sys::JSONBOID => JSONB, pg_sys::BOOLARRAYOID => BOOLARRAY, pg_sys::BYTEAARRAYOID => BYTEAARRAY, pg_sys::CHARARRAYOID => CHARARRAY, pg_sys::NAMEARRAYOID => NAMEARRAY, pg_sys::INT8ARRAYOID => INT8ARRAY, pg_sys::INT2ARRAYOID => INT2ARRAY, pg_sys::INT4ARRAYOID => INT4ARRAY, pg_sys::TEXTARRAYOID => TEXTARRAY, pg_sys::FLOAT4ARRAYOID => FLOAT4ARRAY, pg_sys::FLOAT8ARRAYOID => FLOAT8ARRAY, pg_sys::DATEARRAYOID => DATEARRAY, pg_sys::TIMEARRAYOID => TIMEARRAY, pg_sys::TIMESTAMPARRAYOID => TIMESTAMPARRAY, pg_sys::TIMESTAMPTZARRAYOID => TIMESTAMPTZARRAY, pg_sys::INTERVALARRAYOID => INTERVALARRAY, pg_sys::TIMETZARRAYOID => TIMETZARRAY, pg_sys::NUMERICARRAYOID => NUMERICARRAY, pg_sys::JSONBARRAYOID => JSONBARRAY, other => Other(PgTypId(other)), } } pub fn to_oid(&self) -> Oid { use ShortTypIdSerializer::*; match self { BOOL => pg_sys::BOOLOID, BYTEA => pg_sys::BYTEAOID, CHAR => pg_sys::CHAROID, NAME => pg_sys::NAMEOID, INT8 => pg_sys::INT8OID, INT2 => pg_sys::INT2OID, INT2VECTOR => pg_sys::INT2VECTOROID, INT4 => pg_sys::INT4OID, REGPROC => pg_sys::REGPROCOID, TEXT => pg_sys::TEXTOID, JSON => pg_sys::JSONOID, XML => pg_sys::XMLOID, POINT => pg_sys::POINTOID, FLOAT4 => pg_sys::FLOAT4OID, FLOAT8 => pg_sys::FLOAT8OID, MACADDR8 => pg_sys::MACADDR8OID, VARCHAR => pg_sys::VARCHAROID, DATE => pg_sys::DATEOID, TIME => pg_sys::TIMEOID, TIMESTAMP => pg_sys::TIMESTAMPOID, TIMESTAMPTZ => pg_sys::TIMESTAMPTZOID, INTERVAL => pg_sys::INTERVALOID, TIMETZ => pg_sys::TIMETZOID, JSONB => pg_sys::JSONBOID, BOOLARRAY => pg_sys::BOOLARRAYOID, BYTEAARRAY => pg_sys::BYTEAARRAYOID, CHARARRAY => pg_sys::CHARARRAYOID, NAMEARRAY => pg_sys::NAMEARRAYOID, INT8ARRAY => pg_sys::INT8ARRAYOID, INT2ARRAY => pg_sys::INT2ARRAYOID, INT4ARRAY => pg_sys::INT4ARRAYOID, TEXTARRAY => pg_sys::TEXTARRAYOID, FLOAT4ARRAY => pg_sys::FLOAT4ARRAYOID, FLOAT8ARRAY => pg_sys::FLOAT8ARRAYOID, DATEARRAY => pg_sys::DATEARRAYOID, TIMEARRAY => pg_sys::TIMEARRAYOID, TIMESTAMPARRAY => pg_sys::TIMESTAMPARRAYOID, TIMESTAMPTZARRAY => pg_sys::TIMESTAMPTZARRAYOID, INTERVALARRAY => pg_sys::INTERVALARRAYOID, TIMETZARRAY => pg_sys::TIMETZARRAYOID, NUMERICARRAY => pg_sys::NUMERICARRAYOID, JSONBARRAY => pg_sys::JSONBARRAYOID, Other(other) => other.0, } } } /// `PgTypId` provides provides the ability to serialize and deserialize type /// Oids as `(namespace, name)` pairs. #[derive(Debug)] #[repr(transparent)] pub struct PgTypId(pub Oid); impl Serialize for PgTypId { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { unsafe { let tuple = pg_sys::SearchSysCache1( pg_sys::SysCacheIdentifier::TYPEOID as _, pg_sys::Datum::from(self.0), ); if tuple.is_null() { pgrx::error!("no type info for oid {}", self.0.to_u32()); } let type_tuple: pg_sys::Form_pg_type = get_struct(tuple); let namespace = pg_sys::get_namespace_name((*type_tuple).typnamespace); if namespace.is_null() { pgrx::error!("invalid schema oid {}", (*type_tuple).typnamespace.to_u32()); } let namespace_len = CStr::from_ptr(namespace).to_bytes().len(); let namespace = pg_sys::pg_server_to_any( namespace, namespace_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let namespace = CStr::from_ptr(namespace); let namespace = namespace.to_str().unwrap(); let type_name = (*type_tuple).typname.data.as_ptr(); let type_name_len = CStr::from_ptr(type_name).to_bytes().len(); let type_name = pg_sys::pg_server_to_any( type_name, type_name_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let type_name = CStr::from_ptr(type_name); let type_name = type_name.to_str().unwrap(); let qualified_name: (&str, &str) = (namespace, type_name); let res = qualified_name.serialize(serializer); pg_sys::ReleaseSysCache(tuple); res } } } impl<'de> Deserialize<'de> for PgTypId { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { use serde::de::Error; let (namespace, name) = <(&str, &str)>::deserialize(deserializer)?; let (namespace, name) = ( CString::new(namespace).unwrap(), CString::new(name).unwrap(), ); let (namespace_len, name_len) = (namespace.to_bytes().len(), name.to_bytes().len()); unsafe { let namespace = pg_sys::pg_any_to_server( namespace.as_ptr(), namespace_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let namespace = CStr::from_ptr(namespace); let name = pg_sys::pg_any_to_server( name.as_ptr(), name_len as _, pg_sys::pg_enc::PG_UTF8 as _, ); let name = CStr::from_ptr(name); let namespace_id = pg_sys::LookupExplicitNamespace(namespace.as_ptr(), true); if namespace_id == pg_sys::InvalidOid { return Err(D::Error::custom(format!("invalid namespace {namespace:?}"))); } let type_id = pg_sys::GetSysCacheOid( pg_sys::SysCacheIdentifier::TYPENAMENSP as _, pg_sys::Anum_pg_type_oid as _, pg_sys::Datum::from(name.as_ptr()), pg_sys::Datum::from(namespace_id), pg_sys::Datum::from(0), //unused pg_sys::Datum::from(0), //unused ); if type_id == pg_sys::InvalidOid { return Err(D::Error::custom(format!( "invalid type {namespace:?}.{name:?}" ))); } Ok(PgTypId(type_id)) } } } unsafe fn get_struct(tuple: pg_sys::HeapTuple) -> *mut T { //((char *) ((TUP)->t_data) + (TUP)->t_data->t_hoff) let t_data: *mut u8 = (*tuple).t_data.cast(); let t_hoff = (*(*tuple).t_data).t_hoff; t_data.add(t_hoff as usize).cast() } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::{PgTypId, ShortTypeId}; use pgrx::{ pg_sys::{BOOLOID, CHAROID, CIRCLEOID}, pg_test, }; #[pg_test] fn test_pg_type_id_serialize_char_type() { let serialized = bincode::serialize(&PgTypId(CHAROID)).unwrap(); assert_eq!( serialized, vec![ 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 4, 0, 0, 0, 0, 0, 0, 0, 99, 104, 97, 114 ] ); let deserialized: PgTypId = bincode::deserialize(&serialized).unwrap(); assert_eq!(deserialized.0, CHAROID); } #[pg_test] fn test_pg_type_id_serialize_char_type_ron() { let serialized = ron::to_string(&PgTypId(CHAROID)).unwrap(); assert_eq!(&*serialized, "(\"pg_catalog\",\"char\")",); let deserialized: PgTypId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized.0, CHAROID); } #[pg_test] fn test_pg_type_id_serialize_bool_type() { let serialized = bincode::serialize(&PgTypId(BOOLOID)).unwrap(); assert_eq!( serialized, vec![ 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 4, 0, 0, 0, 0, 0, 0, 0, 98, 111, 111, 108 ] ); let deserialized: PgTypId = bincode::deserialize(&serialized).unwrap(); assert_eq!(deserialized.0, BOOLOID); } #[pg_test] fn test_pg_type_id_serialize_bool_type_ron() { let serialized = ron::to_string(&PgTypId(BOOLOID)).unwrap(); assert_eq!(&*serialized, "(\"pg_catalog\",\"bool\")",); let deserialized: PgTypId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized.0, BOOLOID); } #[pg_test] fn test_short_type_id_serialize_char_type() { let serialized = bincode::serialize(&ShortTypeId(CHAROID)).unwrap(); assert_eq!(serialized, vec![2, 0, 0, 0],); let deserialized: ShortTypeId = bincode::deserialize(&serialized).unwrap(); assert_eq!(deserialized.0, CHAROID); } #[pg_test] fn test_short_type_id_serialize_char_type_ron() { let serialized = ron::to_string(&ShortTypeId(CHAROID)).unwrap(); assert_eq!(&*serialized, "CHAR",); let deserialized: ShortTypeId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized.0, CHAROID); } #[pg_test] fn test_short_type_id_serialize_bool_type() { let serialized = bincode::serialize(&ShortTypeId(BOOLOID)).unwrap(); assert_eq!(serialized, vec![0, 0, 0, 0],); let deserialized: ShortTypeId = bincode::deserialize(&serialized).unwrap(); assert_eq!(deserialized.0, BOOLOID); } #[pg_test] fn test_short_type_id_serialize_bool_type_ron() { let serialized = ron::to_string(&ShortTypeId(BOOLOID)).unwrap(); assert_eq!(&*serialized, "BOOL",); let deserialized: ShortTypeId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized.0, BOOLOID); } #[pg_test] fn test_short_type_id_serialize_circle_type() { let serialized = bincode::serialize(&ShortTypeId(CIRCLEOID)).unwrap(); assert_eq!( serialized, vec![ 42, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 112, 103, 95, 99, 97, 116, 97, 108, 111, 103, 6, 0, 0, 0, 0, 0, 0, 0, 99, 105, 114, 99, 108, 101 ], ); let deserialized: ShortTypeId = bincode::deserialize(&serialized).unwrap(); assert_eq!(deserialized.0, CIRCLEOID); } #[pg_test] fn test_short_type_id_serialize_circle_type_ron() { let serialized = ron::to_string(&ShortTypeId(CIRCLEOID)).unwrap(); assert_eq!(&*serialized, "Type((\"pg_catalog\",\"circle\"))"); let deserialized: ShortTypeId = ron::from_str(&serialized).unwrap(); assert_eq!(deserialized.0, CIRCLEOID); } } ================================================ FILE: extension/src/serialization.rs ================================================ pub use self::collations::PgCollationId; pub use self::functions::PgProcId; pub use self::types::ShortTypeId; use std::{ convert::TryInto, os::raw::{c_char, c_int}, }; use pgrx::pg_sys::{self}; use std::ffi::CStr; pub(crate) mod collations; mod functions; mod types; // basically timestamptz_out #[unsafe(no_mangle)] pub extern "C" fn _ts_toolkit_encode_timestamptz( dt: pg_sys::TimestampTz, buf: &mut [c_char; pg_sys::MAXDATELEN as _], ) { let mut tz: c_int = 0; let mut tt: pg_sys::pg_tm = unsafe { std::mem::MaybeUninit::zeroed().assume_init() }; let mut fsec = 0; let mut tzn = std::ptr::null(); unsafe { if dt == pg_sys::TimestampTz::MAX || dt == pg_sys::TimestampTz::MIN { return pg_sys::EncodeSpecialTimestamp(dt, buf.as_mut_ptr()); } let err = pg_sys::timestamp2tm( dt, &mut tz, &mut tt, &mut fsec, &mut tzn, std::ptr::null_mut(), ); if err != 0 { panic!("timestamp out of range") } pg_sys::EncodeDateTime( &mut tt, fsec, true, tz, tzn, pg_sys::DateStyle, buf.as_mut_ptr(), ) } } #[unsafe(no_mangle)] // this is only going to be used to communicate with a rust lib we compile with this one #[allow(improper_ctypes_definitions)] pub extern "C" fn _ts_toolkit_decode_timestamptz(text: &str) -> i64 { use std::{ffi::CString, mem::MaybeUninit, ptr}; let str = CString::new(text).unwrap(); unsafe { let mut fsec = 0; let mut tt = MaybeUninit::zeroed().assume_init(); let tm = &mut tt; let mut tz = 0; let mut dtype = 0; let mut nf = 0; let mut field = [ptr::null_mut(); pg_sys::MAXDATEFIELDS as _]; let mut ftype = [0; pg_sys::MAXDATEFIELDS as _]; let mut workbuf = [0; pg_sys::MAXDATELEN as usize + pg_sys::MAXDATEFIELDS as usize]; let mut dterr = pg_sys::ParseDateTime( str.as_ptr(), workbuf.as_mut_ptr(), workbuf.len(), field.as_mut_ptr(), ftype.as_mut_ptr(), pg_sys::MAXDATEFIELDS as i32, &mut nf, ); #[cfg(feature = "pg15")] if dterr == 0 { dterr = pg_sys::DecodeDateTime( field.as_mut_ptr(), ftype.as_mut_ptr(), nf, &mut dtype, tm, &mut fsec, &mut tz, ) } #[cfg(not(feature = "pg15"))] if dterr == 0 { let mut extra = pgrx::pg_sys::DateTimeErrorExtra::default(); dterr = pg_sys::DecodeDateTime( field.as_mut_ptr(), ftype.as_mut_ptr(), nf, &mut dtype, tm, &mut fsec, &mut tz, &mut extra as *mut pgrx::pg_sys::DateTimeErrorExtra, ) } #[cfg(feature = "pg15")] if dterr != 0 { pg_sys::DateTimeParseError( dterr, str.as_ptr(), c"timestamptz".as_ptr().cast::(), ); } #[cfg(not(feature = "pg15"))] if dterr != 0 { pg_sys::DateTimeParseError( dterr, core::ptr::null_mut(), str.as_ptr(), c"timestamptz".as_ptr().cast::(), core::ptr::null_mut(), ); } match dtype as u32 { pg_sys::DTK_DATE => { let mut result = 0; let err = pg_sys::tm2timestamp(tm, fsec, &mut tz, &mut result); if err != 0 { // TODO pgrx error with correct errcode? panic!("timestamptz \"{text}\" out of range") } result } pg_sys::DTK_EPOCH => pg_sys::SetEpochTimestamp(), pg_sys::DTK_LATE => pg_sys::TimestampTz::MAX, pg_sys::DTK_EARLY => pg_sys::TimestampTz::MIN, _ => panic!("unexpected result {dtype} when parsing timestamptz \"{text}\""), } } } pub enum EncodedStr<'s> { Utf8(&'s str), Other(&'s CStr), } pub fn str_to_db_encoding(s: &str) -> EncodedStr<'_> { if unsafe { pg_sys::GetDatabaseEncoding() == pg_sys::pg_enc::PG_UTF8 as i32 } { return EncodedStr::Utf8(s); } let bytes = s.as_bytes(); let encoded = unsafe { pg_sys::pg_any_to_server( bytes.as_ptr() as *const c_char, bytes.len().try_into().unwrap(), pg_sys::pg_enc::PG_UTF8 as _, ) }; if std::ptr::eq(encoded, bytes.as_ptr() as *const c_char) { return EncodedStr::Utf8(s); } let cstr = unsafe { CStr::from_ptr(encoded) }; EncodedStr::Other(cstr) } pub fn str_from_db_encoding(s: &CStr) -> &str { if unsafe { pg_sys::GetDatabaseEncoding() == pg_sys::pg_enc::PG_UTF8 as i32 } { return s.to_str().unwrap(); } let str_len = s.to_bytes().len().try_into().unwrap(); let encoded = unsafe { pg_sys::pg_server_to_any(s.as_ptr(), str_len, pg_sys::pg_enc::PG_UTF8 as _) }; if std::ptr::eq(encoded, s.as_ptr()) { //TODO redundant check? return s.to_str().unwrap(); } unsafe { CStr::from_ptr(encoded).to_str().unwrap() } } pub(crate) mod serde_reference_adaptor { pub(crate) fn default_padding() -> [u8; 3] { [0; 3] } pub(crate) fn default_header() -> u32 { 0 } } ================================================ FILE: extension/src/stabilization_info.rs ================================================ // This file serves as the canonical database for what functionality Toolkit has // stabilized and in which version they were stabilized. The file is consumed by // by post-install as well as a number of tests that check if our stabilization // guarantees are being upheld. These different usages require different views // of the same info, so to avoid parsing issues the stabilization data is // exposed as macros that're left to the other files to interpret. // // XXX this file is used as multiple modules. Search for `#[path = "..."]` // directives before adding new macros to make sure that all relevant usages // can handle it. crate::functions_stabilized_at! { STABLE_FUNCTIONS "1.20.0" => { total(tdigest), total(uddsketch), } "1.16.0" => { approx_count_distinct(anyelement), approx_count_distinct_trans(internal,anyelement), accessornumgaps_in(cstring), accessornumgaps_out(accessornumgaps), accessornumliveranges_in(cstring), accessornumliveranges_out(accessornumliveranges), arrow_heartbeat_agg_num_gaps(heartbeatagg,accessornumgaps), arrow_heartbeat_agg_num_live_ranges(heartbeatagg,accessornumliveranges), arrow_heartbeat_agg_trim_to(heartbeatagg,heartbeattrimtoaccessor), heartbeattrimtoaccessor_in(cstring), heartbeattrimtoaccessor_out(heartbeattrimtoaccessor), num_gaps(), num_gaps(heartbeatagg), num_live_ranges(), num_live_ranges(heartbeatagg), trim_to(heartbeatagg,timestamp with time zone,interval), trim_to(timestamp with time zone,interval), accessorpercentilearray_in(cstring), accessorpercentilearray_out(accessorpercentilearray), arrow_uddsketch_approx_percentile_array(uddsketch,accessorpercentilearray), days_in_month(timestamp with time zone), month_normalize(double precision,timestamp with time zone,double precision), to_epoch(timestamp with time zone), accessorintoarray_in(cstring), accessorintoarray_out(accessorintoarray), arrow_max_float_into_array(maxfloats,accessorintoarray), arrow_max_float_into_values(maxfloats,accessorintovalues), into_array(), into_array(maxfloats), into_array(maxints), into_array(maxtimes), into_array(minfloats), into_array(minints), into_array(mintimes), into_values(maxbyfloats,anyelement), into_values(maxbyints,anyelement), into_values(maxbytimes,anyelement), into_values(maxfloats), into_values(maxints), into_values(maxtimes), into_values(minbyfloats,anyelement), into_values(minbyints,anyelement), into_values(minbytimes,anyelement), into_values(minfloats), into_values(minints), into_values(mintimes), max_n(bigint,bigint), max_n(double precision,bigint), max_n(timestamp with time zone,bigint), max_n_by(bigint,anyelement,bigint), max_n_by(double precision,anyelement,bigint), max_n_by(timestamp with time zone,anyelement,bigint), max_n_by_float_final(internal), max_n_by_float_rollup_trans(internal,maxbyfloats), max_n_by_float_trans(internal,double precision,anyelement,bigint), max_n_by_int_final(internal), max_n_by_int_rollup_trans(internal,maxbyints), max_n_by_int_trans(internal,bigint,anyelement,bigint), max_n_by_time_final(internal), max_n_by_time_rollup_trans(internal,maxbytimes), max_n_by_time_trans(internal,timestamp with time zone,anyelement,bigint), max_n_float_combine(internal,internal), max_n_float_deserialize(bytea,internal), max_n_float_final(internal), max_n_float_rollup_trans(internal,maxfloats), max_n_float_serialize(internal), max_n_float_trans(internal,double precision,bigint), max_n_int_combine(internal,internal), max_n_int_deserialize(bytea,internal), max_n_int_final(internal), max_n_int_rollup_trans(internal,maxints), max_n_int_serialize(internal), max_n_int_trans(internal,bigint,bigint), max_n_time_combine(internal,internal), max_n_time_deserialize(bytea,internal), max_n_time_final(internal), max_n_time_rollup_trans(internal,maxtimes), max_n_time_serialize(internal), max_n_time_trans(internal,timestamp with time zone,bigint), maxbyfloats_in(cstring), maxbyfloats_out(maxbyfloats), maxbyints_in(cstring), maxbyints_out(maxbyints), maxbytimes_in(cstring), maxbytimes_out(maxbytimes), maxfloats_in(cstring), maxfloats_out(maxfloats), maxints_in(cstring), maxints_out(maxints), maxtimes_in(cstring), maxtimes_out(maxtimes), min_n(bigint,bigint), min_n(double precision,bigint), min_n(timestamp with time zone,bigint), min_n_by(bigint,anyelement,bigint), min_n_by(double precision,anyelement,bigint), min_n_by(timestamp with time zone,anyelement,bigint), min_n_by_float_final(internal), min_n_by_float_rollup_trans(internal,minbyfloats), min_n_by_float_trans(internal,double precision,anyelement,bigint), min_n_by_int_final(internal), min_n_by_int_rollup_trans(internal,minbyints), min_n_by_int_trans(internal,bigint,anyelement,bigint), min_n_by_time_final(internal), min_n_by_time_rollup_trans(internal,minbytimes), min_n_by_time_trans(internal,timestamp with time zone,anyelement,bigint), min_n_float_combine(internal,internal), min_n_float_deserialize(bytea,internal), min_n_float_final(internal), min_n_float_rollup_trans(internal,minfloats), min_n_float_serialize(internal), min_n_float_trans(internal,double precision,bigint), min_n_int_combine(internal,internal), min_n_int_deserialize(bytea,internal), min_n_int_final(internal), min_n_int_rollup_trans(internal,minints), min_n_int_serialize(internal), min_n_int_trans(internal,bigint,bigint), min_n_time_combine(internal,internal), min_n_time_deserialize(bytea,internal), min_n_time_final(internal), min_n_time_rollup_trans(internal,mintimes), min_n_time_serialize(internal), min_n_time_trans(internal,timestamp with time zone,bigint), minbyfloats_in(cstring), minbyfloats_out(minbyfloats), minbyints_in(cstring), minbyints_out(minbyints), minbytimes_in(cstring), minbytimes_out(minbytimes), minfloats_in(cstring), minfloats_out(minfloats), minints_in(cstring), minints_out(minints), mintimes_in(cstring), mintimes_out(mintimes), rollup(maxbyfloats), rollup(maxbyints), rollup(maxbytimes), rollup(maxfloats), rollup(maxints), rollup(maxtimes), rollup(minbyfloats), rollup(minbyints), rollup(minbytimes), rollup(minfloats), rollup(minints), rollup(mintimes), arrow_max_int_into_array(maxints,accessorintoarray), arrow_max_int_into_values(maxints,accessorintovalues), arrow_max_time_into_array(maxtimes,accessorintoarray), arrow_max_time_into_values(maxtimes,accessorintovalues), arrow_min_float_into_array(minfloats,accessorintoarray), arrow_min_float_into_values(minfloats,accessorintovalues), arrow_min_int_into_array(minints,accessorintoarray), arrow_min_int_into_values(minints,accessorintovalues), arrow_min_time_into_array(mintimes,accessorintoarray), arrow_min_time_into_values(mintimes,accessorintovalues), accessormaxfrequencyint_in(cstring), accessormaxfrequencyint_out(accessormaxfrequencyint), accessorminfrequencyint_in(cstring), accessorminfrequencyint_out(accessorminfrequencyint), accessortopn_in(cstring), accessortopn_out(accessortopn), accessortopncount_in(cstring), accessortopncount_out(accessortopncount), arrow_default_topn_bigint(spacesavingbigintaggregate,accessortopn), arrow_default_topn_text(spacesavingtextaggregate,accessortopn), arrow_freq_bigint_iter(spacesavingbigintaggregate,accessorintovalues), arrow_freq_text_iter(spacesavingtextaggregate,accessorintovalues), arrow_max_bigint_frequency(spacesavingbigintaggregate,accessormaxfrequencyint), arrow_min_bigint_frequency(spacesavingbigintaggregate,accessorminfrequencyint), arrow_topn_bigint(spacesavingbigintaggregate,accessortopncount), arrow_topn_text(spacesavingtextaggregate,accessortopncount), into_values(spacesavingaggregate,anyelement), into_values(spacesavingbigintaggregate), into_values(spacesavingtextaggregate), max_frequency(bigint), max_frequency(spacesavingaggregate,anyelement), max_frequency(spacesavingbigintaggregate,bigint), max_frequency(spacesavingtextaggregate,text), mcv_agg(integer,bigint), mcv_agg(integer,double precision,bigint), mcv_agg(integer,double precision,text), mcv_agg(integer,text), mcv_agg_bigint_trans(internal,integer,bigint), mcv_agg_text_trans(internal,integer,text), mcv_agg_trans(internal,integer,anyelement), mcv_agg_with_skew_bigint_trans(internal,integer,double precision,bigint), mcv_agg_with_skew_text_trans(internal,integer,double precision,text), mcv_agg_with_skew_trans(internal,integer,double precision,anyelement), min_frequency(bigint), min_frequency(spacesavingaggregate,anyelement), min_frequency(spacesavingbigintaggregate,bigint), min_frequency(spacesavingtextaggregate,text), raw_mcv_agg(integer,anyelement), raw_mcv_agg(integer,double precision,anyelement), rollup(spacesavingaggregate), rollup(spacesavingbigintaggregate), rollup(spacesavingtextaggregate), rollup_agg_bigint_trans(internal,spacesavingbigintaggregate), rollup_agg_text_trans(internal,spacesavingtextaggregate), rollup_agg_trans(internal,spacesavingaggregate), space_saving_bigint_final(internal), space_saving_combine(internal,internal), space_saving_deserialize(bytea,internal), space_saving_final(internal), space_saving_serialize(internal), space_saving_text_final(internal), spacesavingaggregate_in(cstring), spacesavingaggregate_out(spacesavingaggregate), spacesavingbigintaggregate_in(cstring), spacesavingbigintaggregate_out(spacesavingbigintaggregate), spacesavingtextaggregate_in(cstring), spacesavingtextaggregate_out(spacesavingtextaggregate), topn(), topn(bigint), topn(spacesavingaggregate,anyelement), topn(spacesavingaggregate,integer,anyelement), topn(spacesavingbigintaggregate), topn(spacesavingbigintaggregate,integer), topn(spacesavingtextaggregate), topn(spacesavingtextaggregate,integer), } "1.15.0" => { arrow_counter_interpolated_delta(countersummary,counterinterpolateddeltaaccessor), arrow_counter_interpolated_rate(countersummary,counterinterpolatedrateaccessor), arrow_time_weighted_average_interpolated_average(timeweightsummary,timeweightinterpolatedaverageaccessor), counterinterpolateddeltaaccessor_in(cstring), counterinterpolateddeltaaccessor_out(counterinterpolateddeltaaccessor), counterinterpolatedrateaccessor_in(cstring), counterinterpolatedrateaccessor_out(counterinterpolatedrateaccessor), interpolated_average(timestamp with time zone,interval,timeweightsummary,timeweightsummary), interpolated_delta(timestamp with time zone,interval,countersummary,countersummary), interpolated_rate(timestamp with time zone,interval,countersummary,countersummary), timeweightinterpolatedaverageaccessor_in(cstring), timeweightinterpolatedaverageaccessor_out(timeweightinterpolatedaverageaccessor), accessorintegral_in(cstring), accessorintegral_out(accessorintegral), arrow_time_weighted_average_integral(timeweightsummary,accessorintegral), arrow_time_weighted_average_interpolated_integral(timeweightsummary,timeweightinterpolatedintegralaccessor), integral(text), integral(timeweightsummary,text), interpolated_integral(timestamp with time zone,interval,timeweightsummary,timeweightsummary,text), interpolated_integral(timeweightsummary,timestamp with time zone, interval,timeweightsummary,timeweightsummary,text), timeweightinterpolatedintegralaccessor_in(cstring), timeweightinterpolatedintegralaccessor_out(timeweightinterpolatedintegralaccessor), dead_ranges(heartbeatagg), downtime(heartbeatagg), heartbeat_agg(timestamp with time zone,timestamp with time zone,interval,interval), heartbeat_final(internal), heartbeat_rollup_trans(internal,heartbeatagg), heartbeat_trans(internal,timestamp with time zone,timestamp with time zone,interval,interval), heartbeatagg_in(cstring), heartbeatagg_out(heartbeatagg), interpolate(heartbeatagg,heartbeatagg), interpolated_downtime(heartbeatagg,heartbeatagg), interpolated_uptime(heartbeatagg,heartbeatagg), live_at(heartbeatagg,timestamp with time zone), live_ranges(heartbeatagg), rollup(heartbeatagg), uptime(heartbeatagg), accessordeadranges_in(cstring), accessordeadranges_out(accessordeadranges), accessordowntime_in(cstring), accessordowntime_out(accessordowntime), accessorliveat_in(cstring), accessorliveat_out(accessorliveat), accessorliveranges_in(cstring), accessorliveranges_out(accessorliveranges), accessoruptime_in(cstring), accessoruptime_out(accessoruptime), arrow_heartbeat_agg_dead_ranges(heartbeatagg,accessordeadranges), arrow_heartbeat_agg_downtime(heartbeatagg,accessordowntime), arrow_heartbeat_agg_live_at(heartbeatagg,accessorliveat), arrow_heartbeat_agg_live_ranges(heartbeatagg,accessorliveranges), arrow_heartbeat_agg_uptime(heartbeatagg,accessoruptime), dead_ranges(), downtime(), live_at(timestamp with time zone), live_ranges(), uptime(), arrow_heartbeat_agg_interpolate(heartbeatagg,heartbeatinterpolateaccessor), arrow_heartbeat_agg_interpolated_downtime(heartbeatagg,heartbeatinterpolateddowntimeaccessor), arrow_heartbeat_agg_interpolated_uptime(heartbeatagg,heartbeatinterpolateduptimeaccessor), heartbeatinterpolateaccessor_in(cstring), heartbeatinterpolateaccessor_out(heartbeatinterpolateaccessor), heartbeatinterpolateddowntimeaccessor_in(cstring), heartbeatinterpolateddowntimeaccessor_out(heartbeatinterpolateddowntimeaccessor), heartbeatinterpolateduptimeaccessor_in(cstring), heartbeatinterpolateduptimeaccessor_out(heartbeatinterpolateduptimeaccessor), interpolate(heartbeatagg), interpolated_downtime(heartbeatagg), interpolated_uptime(heartbeatagg), duration_in(stateagg,bigint), duration_in(stateagg,bigint,timestamp with time zone,interval), duration_in(stateagg,text), duration_in(stateagg,text,timestamp with time zone,interval), interpolated_duration_in(stateagg,bigint,timestamp with time zone,interval,stateagg), interpolated_duration_in(stateagg,text,timestamp with time zone,interval,stateagg), interpolated_state_periods(stateagg,bigint,timestamp with time zone,interval,stateagg), interpolated_state_periods(stateagg,text,timestamp with time zone,interval,stateagg), interpolated_state_timeline(stateagg,timestamp with time zone,interval,stateagg), interpolated_state_int_timeline(stateagg,timestamp with time zone,interval,stateagg), into_int_values(stateagg), into_values(stateagg), rollup(stateagg), state_agg(timestamp with time zone,bigint), state_agg(timestamp with time zone,text), state_agg_combine_fn_outer(internal,internal), state_agg_deserialize_fn_outer(bytea,internal), state_agg_finally_fn_outer(internal), state_agg_int_trans(internal,timestamp with time zone,bigint), state_agg_rollup_final(internal), state_agg_rollup_trans(internal,stateagg), state_agg_serialize_fn_outer(internal), state_agg_transition_fn_outer(internal,timestamp with time zone,text), state_at(stateagg,timestamp with time zone), state_at_int(stateagg,timestamp with time zone), state_int_timeline(stateagg), state_periods(stateagg,bigint), state_periods(stateagg,text), state_timeline(stateagg), stateagg_in(cstring), stateagg_out(stateagg), state_agg_rollup_combine(internal,internal), state_agg_rollup_deserialize(bytea,internal), state_agg_rollup_serialize(internal), accessordurationin_in(cstring), accessordurationin_out(accessordurationin), accessordurationinint_in(cstring), accessordurationinint_out(accessordurationinint), accessordurationinrange_in(cstring), accessordurationinrange_out(accessordurationinrange), accessordurationinrangeint_in(cstring), accessordurationinrangeint_out(accessordurationinrangeint), accessorinterpolateddurationin_in(cstring), accessorinterpolateddurationin_out(accessorinterpolateddurationin), accessorinterpolateddurationinint_in(cstring), accessorinterpolateddurationinint_out(accessorinterpolateddurationinint), accessorinterpolatedstateinttimeline_in(cstring), accessorinterpolatedstateinttimeline_out(accessorinterpolatedstateinttimeline), accessorinterpolatedstateperiods_in(cstring), accessorinterpolatedstateperiods_out(accessorinterpolatedstateperiods), accessorinterpolatedstateperiodsint_in(cstring), accessorinterpolatedstateperiodsint_out(accessorinterpolatedstateperiodsint), accessorinterpolatedstatetimeline_in(cstring), accessorinterpolatedstatetimeline_out(accessorinterpolatedstatetimeline), accessorintointvalues_in(cstring), accessorintointvalues_out(accessorintointvalues), accessorintovalues_in(cstring), accessorintovalues_out(accessorintovalues), accessorstateat_in(cstring), accessorstateat_out(accessorstateat), accessorstateatint_in(cstring), accessorstateatint_out(accessorstateatint), accessorstateinttimeline_in(cstring), accessorstateinttimeline_out(accessorstateinttimeline), accessorstateperiods_in(cstring), accessorstateperiods_out(accessorstateperiods), accessorstateperiodsint_in(cstring), accessorstateperiodsint_out(accessorstateperiodsint), accessorstatetimeline_in(cstring), accessorstatetimeline_out(accessorstatetimeline), arrow_state_agg_duration_in_int(stateagg,accessordurationinint), arrow_state_agg_duration_in_range_int(stateagg,accessordurationinrangeint), arrow_state_agg_duration_in_range_string(stateagg,accessordurationinrange), arrow_state_agg_duration_in_string(stateagg,accessordurationin), arrow_state_agg_interpolated_duration_in_int(stateagg,accessorinterpolateddurationinint), arrow_state_agg_interpolated_duration_in_string(stateagg,accessorinterpolateddurationin), arrow_state_agg_interpolated_state_int_timeline(stateagg,accessorinterpolatedstateinttimeline), arrow_state_agg_interpolated_state_periods_int(stateagg,accessorinterpolatedstateperiodsint), arrow_state_agg_interpolated_state_periods_string(stateagg,accessorinterpolatedstateperiods), arrow_state_agg_interpolated_state_timeline(stateagg,accessorinterpolatedstatetimeline), arrow_state_agg_into_int_values(stateagg,accessorintointvalues), arrow_state_agg_into_values(stateagg,accessorintovalues), arrow_state_agg_state_at_int(stateagg,accessorstateatint), arrow_state_agg_state_at_string(stateagg,accessorstateat), arrow_state_agg_state_int_timeline(stateagg,accessorstateinttimeline), arrow_state_agg_state_periods_int(stateagg,accessorstateperiodsint), arrow_state_agg_state_periods_string(stateagg,accessorstateperiods), arrow_state_agg_state_timeline(stateagg,accessorstatetimeline), duration_in(bigint), duration_in(bigint,timestamp with time zone,interval), duration_in(text), duration_in(text,timestamp with time zone,interval), interpolated_duration_in(bigint,timestamp with time zone,interval,stateagg), interpolated_duration_in(text,timestamp with time zone,interval,stateagg), interpolated_state_int_timeline(timestamp with time zone,interval,stateagg), interpolated_state_periods(bigint,timestamp with time zone,interval,stateagg), interpolated_state_periods(text,timestamp with time zone,interval,stateagg), interpolated_state_timeline(timestamp with time zone,interval,stateagg), into_int_values(), into_values(), state_at(timestamp with time zone), state_at_int(timestamp with time zone), state_int_timeline(), state_periods(bigint), state_periods(text), state_timeline(), } "1.14.0" => { interpolated_average(timeweightsummary,timestamp with time zone,interval,timeweightsummary,timeweightsummary), interpolated_delta(countersummary,timestamp with time zone,interval,countersummary,countersummary), interpolated_rate(countersummary,timestamp with time zone,interval,countersummary,countersummary), accessorclose_in(cstring), accessorclose_out(accessorclose), accessorclosetime_in(cstring), accessorclosetime_out(accessorclosetime), accessorhigh_in(cstring), accessorhigh_out(accessorhigh), accessorhightime_in(cstring), accessorhightime_out(accessorhightime), accessorlow_in(cstring), accessorlow_out(accessorlow), accessorlowtime_in(cstring), accessorlowtime_out(accessorlowtime), accessoropen_in(cstring), accessoropen_out(accessoropen), accessoropentime_in(cstring), accessoropentime_out(accessoropentime), arrow_close(candlestick,accessorclose), arrow_close_time(candlestick,accessorclosetime), arrow_high(candlestick,accessorhigh), arrow_high_time(candlestick,accessorhightime), arrow_low(candlestick,accessorlow), arrow_low_time(candlestick,accessorlowtime), arrow_open(candlestick,accessoropen), arrow_open_time(candlestick,accessoropentime), candlestick(timestamp with time zone, double precision,double precision, double precision, double precision, double precision), candlestick_agg(timestamp with time zone,double precision,double precision), candlestick_combine(internal,internal), candlestick_deserialize(bytea,internal), candlestick_final(internal), candlestick_in(cstring), candlestick_out(candlestick), candlestick_rollup_trans(internal,candlestick), candlestick_serialize(internal), open(), open_time(), close(candlestick), close(), close_time(candlestick), close_time(), high(candlestick), high(), high_time(candlestick), high_time(), low(candlestick), low(), low_time(candlestick), low_time(), open(candlestick), open_time(candlestick), rollup(candlestick), tick_data_no_vol_transition(internal,timestamp with time zone,double precision), tick_data_transition(internal,timestamp with time zone,double precision,double precision), volume(candlestick), vwap(candlestick), } "1.12.0" => { stats1d_tf_inv_trans(internal,double precision), stats1d_tf_final(internal), stats1d_tf_trans(internal,double precision), stats2d_tf_final(internal), stats2d_tf_trans(internal,double precision,double precision), stats2d_tf_inv_trans(internal,double precision,double precision), } "1.11.0" => { accessorfirsttime_in(cstring), accessorfirsttime_out(accessorfirsttime), accessorfirstval_in(cstring), accessorfirstval_out(accessorfirstval), accessorlasttime_in(cstring), accessorlasttime_out(accessorlasttime), accessorlastval_in(cstring), accessorlastval_out(accessorlastval), arrow_counter_agg_first_time(countersummary,accessorfirsttime), arrow_counter_agg_first_val(countersummary,accessorfirstval), arrow_counter_agg_last_time(countersummary,accessorlasttime), arrow_counter_agg_last_val(countersummary,accessorlastval), arrow_time_weight_first_time(timeweightsummary,accessorfirsttime), arrow_time_weight_first_val(timeweightsummary,accessorfirstval), arrow_time_weight_last_time(timeweightsummary,accessorlasttime), arrow_time_weight_last_val(timeweightsummary,accessorlastval), first_time(), first_time(countersummary), first_time(timeweightsummary), first_val(), first_val(countersummary), first_val(timeweightsummary), last_time(), last_time(countersummary), last_time(timeweightsummary), last_val(), last_val(countersummary), last_val(timeweightsummary), asap_final(internal), asap_smooth(timestamp with time zone,double precision,integer), asap_smooth(timevector_tstz_f64,integer), asap_trans(internal,timestamp with time zone,double precision,integer), } "1.9.0" => { accessorapproxpercentile_in(cstring), accessorapproxpercentile_out(accessorapproxpercentile), accessorapproxpercentilerank_in(cstring), accessorapproxpercentilerank_out(accessorapproxpercentilerank), accessoraverage_in(cstring), accessoraverage_out(accessoraverage), accessoraveragex_in(cstring), accessoraveragex_out(accessoraveragex), accessoraveragey_in(cstring), accessoraveragey_out(accessoraveragey), accessorcorr_in(cstring), accessorcorr_out(accessorcorr), accessorcounterzerotime_in(cstring), accessorcounterzerotime_out(accessorcounterzerotime), accessorcovar_in(cstring), accessorcovar_out(accessorcovar), accessordelta_in(cstring), accessordelta_out(accessordelta), accessordeterminationcoeff_in(cstring), accessordeterminationcoeff_out(accessordeterminationcoeff), accessordistinctcount_in(cstring), accessordistinctcount_out(accessordistinctcount), accessorerror_in(cstring), accessorerror_out(accessorerror), accessorextrapolateddelta_in(cstring), accessorextrapolateddelta_out(accessorextrapolateddelta), accessorextrapolatedrate_in(cstring), accessorextrapolatedrate_out(accessorextrapolatedrate), accessorideltaleft_in(cstring), accessorideltaleft_out(accessorideltaleft), accessorideltaright_in(cstring), accessorideltaright_out(accessorideltaright), accessorintercept_in(cstring), accessorintercept_out(accessorintercept), accessorirateleft_in(cstring), accessorirateleft_out(accessorirateleft), accessorirateright_in(cstring), accessorirateright_out(accessorirateright), accessorkurtosis_in(cstring), accessorkurtosis_out(accessorkurtosis), accessorkurtosisx_in(cstring), accessorkurtosisx_out(accessorkurtosisx), accessorkurtosisy_in(cstring), accessorkurtosisy_out(accessorkurtosisy), accessormaxval_in(cstring), accessormaxval_out(accessormaxval), accessormean_in(cstring), accessormean_out(accessormean), accessorminval_in(cstring), accessorminval_out(accessorminval), accessornumchanges_in(cstring), accessornumchanges_out(accessornumchanges), accessornumelements_in(cstring), accessornumelements_out(accessornumelements), accessornumresets_in(cstring), accessornumresets_out(accessornumresets), accessornumvals_in(cstring), accessornumvals_out(accessornumvals), accessorrate_in(cstring), accessorrate_out(accessorrate), accessorskewness_in(cstring), accessorskewness_out(accessorskewness), accessorskewnessx_in(cstring), accessorskewnessx_out(accessorskewnessx), accessorskewnessy_in(cstring), accessorskewnessy_out(accessorskewnessy), accessorslope_in(cstring), accessorslope_out(accessorslope), accessorstddev_in(cstring), accessorstddev_out(accessorstddev), accessorstddevx_in(cstring), accessorstddevx_out(accessorstddevx), accessorstddevy_in(cstring), accessorstddevy_out(accessorstddevy), accessorstderror_in(cstring), accessorstderror_out(accessorstderror), accessorsum_in(cstring), accessorsum_out(accessorsum), accessorsumx_in(cstring), accessorsumx_out(accessorsumx), accessorsumy_in(cstring), accessorsumy_out(accessorsumy), accessortimedelta_in(cstring), accessortimedelta_out(accessortimedelta), accessorunnest_in(cstring), accessorunnest_out(accessorunnest), accessorvariance_in(cstring), accessorvariance_out(accessorvariance), accessorvariancex_in(cstring), accessorvariancex_out(accessorvariancex), accessorvariancey_in(cstring), accessorvariancey_out(accessorvariancey), accessorwithbounds_in(cstring), accessorwithbounds_out(accessorwithbounds), accessorxintercept_in(cstring), accessorxintercept_out(accessorxintercept), approx_percentile(double precision), approx_percentile_rank(double precision), arrow_counter_agg_corr(countersummary,accessorcorr), arrow_counter_agg_delta(countersummary,accessordelta), arrow_counter_agg_extrapolated_delta(countersummary,accessorextrapolateddelta), arrow_counter_agg_extrapolated_rate(countersummary,accessorextrapolatedrate), arrow_counter_agg_idelta_left(countersummary,accessorideltaleft), arrow_counter_agg_idelta_right(countersummary,accessorideltaright), arrow_counter_agg_intercept(countersummary,accessorintercept), arrow_counter_agg_irate_left(countersummary,accessorirateleft), arrow_counter_agg_irate_right(countersummary,accessorirateright), arrow_counter_agg_num_changes(countersummary,accessornumchanges), arrow_counter_agg_num_elements(countersummary,accessornumelements), arrow_counter_agg_num_resets(countersummary,accessornumresets), arrow_counter_agg_rate(countersummary,accessorrate), arrow_counter_agg_slope(countersummary,accessorslope), arrow_counter_agg_time_delta(countersummary,accessortimedelta), arrow_counter_agg_with_bounds(countersummary,accessorwithbounds), arrow_counter_agg_zero_time(countersummary,accessorcounterzerotime), arrow_hyperloglog_count(hyperloglog,accessordistinctcount), arrow_hyperloglog_error(hyperloglog,accessorstderror), arrow_stats1d_average(statssummary1d,accessoraverage), arrow_stats1d_kurtosis(statssummary1d,accessorkurtosis), arrow_stats1d_num_vals(statssummary1d,accessornumvals), arrow_stats1d_skewness(statssummary1d,accessorskewness), arrow_stats1d_stddev(statssummary1d,accessorstddev), arrow_stats1d_sum(statssummary1d,accessorsum), arrow_stats1d_variance(statssummary1d,accessorvariance), arrow_stats2d_average_x(statssummary2d,accessoraveragex), arrow_stats2d_average_y(statssummary2d,accessoraveragey), arrow_stats2d_corr(statssummary2d,accessorcorr), arrow_stats2d_covar(statssummary2d,accessorcovar), arrow_stats2d_determination_coeff(statssummary2d,accessordeterminationcoeff), arrow_stats2d_intercept(statssummary2d,accessorintercept), arrow_stats2d_kurtosis_x(statssummary2d,accessorkurtosisx), arrow_stats2d_kurtosis_y(statssummary2d,accessorkurtosisy), arrow_stats2d_num_vals(statssummary2d,accessornumvals), arrow_stats2d_skewness_x(statssummary2d,accessorskewnessx), arrow_stats2d_skewness_y(statssummary2d,accessorskewnessy), arrow_stats2d_slope(statssummary2d,accessorslope), arrow_stats2d_stdddev_x(statssummary2d,accessorstddevx), arrow_stats2d_stdddev_y(statssummary2d,accessorstddevy), arrow_stats2d_sum_x(statssummary2d,accessorsumx), arrow_stats2d_sum_y(statssummary2d,accessorsumy), arrow_stats2d_variance_x(statssummary2d,accessorvariancex), arrow_stats2d_variance_y(statssummary2d,accessorvariancey), arrow_stats2d_x_intercept(statssummary2d,accessorxintercept), arrow_tdigest_approx_percentile(tdigest,accessorapproxpercentile), arrow_tdigest_approx_rank(tdigest,accessorapproxpercentilerank), arrow_tdigest_max(tdigest,accessormaxval), arrow_tdigest_mean(tdigest,accessormean), arrow_tdigest_min(tdigest,accessorminval), arrow_tdigest_num_vals(tdigest,accessornumvals), arrow_time_weighted_average_average(timeweightsummary,accessoraverage), arrow_timevector_unnest(timevector_tstz_f64,accessorunnest), arrow_uddsketch_approx_percentile(uddsketch,accessorapproxpercentile), arrow_uddsketch_approx_rank(uddsketch,accessorapproxpercentilerank), arrow_uddsketch_error(uddsketch,accessorerror), arrow_uddsketch_mean(uddsketch,accessormean), arrow_uddsketch_num_vals(uddsketch,accessornumvals), average(), average_x(), average_y(), corr(), counter_zero_time(), covariance(text), delta(), determination_coeff(), distinct_count(), error(), extrapolated_delta(text), extrapolated_rate(text), idelta_left(), idelta_right(), intercept(), irate_left(), irate_right(), kurtosis(text), kurtosis_x(text), kurtosis_y(text), lttb(timestamp with time zone,double precision,integer), lttb(timevector_tstz_f64,integer), lttb_final(internal), lttb_trans(internal,timestamp with time zone,double precision,integer), max_val(), mean(), min_val(), num_changes(), num_elements(), num_resets(), num_vals(), rate(), rollup(timevector_tstz_f64), skewness(text), skewness_x(text), skewness_y(text), slope(), stddev(text), stddev_x(text), stddev_y(text), stderror(), sum(), sum_x(), sum_y(), time_delta(), timevector(timestamp with time zone,double precision), timevector_combine(internal,internal), timevector_deserialize(bytea,internal), timevector_final(internal), timevector_serialize(internal), timevector_tstz_f64_compound_trans(internal,timevector_tstz_f64), timevector_tstz_f64_in(cstring), timevector_tstz_f64_out(timevector_tstz_f64), timevector_tstz_f64_trans(internal,timestamp with time zone,double precision), unnest(), unnest(timevector_tstz_f64), variance(text), variance_x(text), variance_y(text), with_bounds(tstzrange), x_intercept(), lttb(timestamp with time zone,double precision,integer), lttb(timevector_tstz_f64,integer), lttb_final(internal), lttb_trans(internal,timestamp with time zone,double precision,integer), } "1.8.0" => { } "1.7.0" => { } "1.6.0" => { } "1.5" => { } "prehistory" => { approx_percentile(double precision,uddsketch), approx_percentile_rank(double precision,uddsketch), error(uddsketch), mean(uddsketch), num_vals(uddsketch), percentile_agg(double precision), percentile_agg_trans(internal,double precision), uddsketch(integer,double precision,double precision), rollup(uddsketch), uddsketch_combine(internal,internal), uddsketch_compound_trans(internal,uddsketch), uddsketch_deserialize(bytea,internal), uddsketch_final(internal), uddsketch_in(cstring), uddsketch_out(uddsketch), uddsketch_serialize(internal), uddsketch_trans(internal,integer,double precision,double precision), approx_percentile(double precision,tdigest), approx_percentile_rank(double precision,tdigest), max_val(tdigest), min_val(tdigest), mean(tdigest), num_vals(tdigest), tdigest(integer,double precision), rollup(tdigest), tdigest_combine(internal,internal), tdigest_compound_combine(internal,internal), tdigest_compound_deserialize(bytea,internal), tdigest_compound_final(internal), tdigest_compound_serialize(internal), tdigest_compound_trans(internal,tdigest), tdigest_deserialize(bytea,internal), tdigest_final(internal), tdigest_in(cstring), tdigest_out(tdigest), tdigest_serialize(internal), tdigest_trans(internal,integer,double precision), average(timeweightsummary), time_weight(text,timestamp with time zone,double precision), rollup(timeweightsummary), time_weight_combine(internal,internal), time_weight_final(internal), time_weight_summary_trans(internal,timeweightsummary), time_weight_trans(internal,text,timestamp with time zone,double precision), time_weight_trans_deserialize(bytea,internal), time_weight_trans_serialize(internal), timeweightsummary_in(cstring), timeweightsummary_out(timeweightsummary), corr(countersummary), counter_agg(timestamp with time zone,double precision), counter_agg(timestamp with time zone,double precision,tstzrange), counter_agg_combine(internal,internal), counter_agg_final(internal), counter_agg_summary_trans(internal,countersummary), counter_agg_trans(internal,timestamp with time zone,double precision,tstzrange), counter_agg_trans_no_bounds(internal,timestamp with time zone,double precision), counter_summary_trans_deserialize(bytea,internal), counter_summary_trans_serialize(internal), counter_zero_time(countersummary), countersummary_in(cstring), countersummary_out(countersummary), delta(countersummary), extrapolated_delta(countersummary,text), extrapolated_rate(countersummary,text), idelta_left(countersummary), idelta_right(countersummary), intercept(countersummary), irate_left(countersummary), irate_right(countersummary), num_changes(countersummary), num_elements(countersummary), num_resets(countersummary), rate(countersummary), rollup(countersummary), slope(countersummary), time_delta(countersummary), with_bounds(countersummary,tstzrange), hyperloglog(integer,anyelement), hyperloglog_combine(internal,internal), hyperloglog_deserialize(bytea,internal), hyperloglog_final(internal), hyperloglog_in(cstring), hyperloglog_out(hyperloglog), hyperloglog_serialize(internal), hyperloglog_trans(internal,integer,anyelement), hyperloglog_union(internal,hyperloglog), rollup(hyperloglog), stderror(hyperloglog), average(statssummary1d), average_x(statssummary2d), average_y(statssummary2d), corr(statssummary2d), covariance(statssummary2d,text), determination_coeff(statssummary2d), intercept(statssummary2d), kurtosis(statssummary1d,text), kurtosis_x(statssummary2d,text), kurtosis_y(statssummary2d,text), num_vals(statssummary1d), num_vals(statssummary2d), rolling(statssummary1d), rolling(statssummary2d), rollup(statssummary1d), rollup(statssummary2d), skewness(statssummary1d,text), skewness_x(statssummary2d,text), skewness_y(statssummary2d,text), slope(statssummary2d), stats1d_combine(internal,internal), stats1d_final(internal), stats1d_inv_trans(internal,double precision), stats1d_summary_inv_trans(internal,statssummary1d), stats1d_summary_trans(internal,statssummary1d), stats1d_trans(internal,double precision), stats1d_trans_deserialize(bytea,internal), stats1d_trans_serialize(internal), stats2d_combine(internal,internal), stats2d_final(internal), stats2d_inv_trans(internal,double precision,double precision), stats2d_summary_inv_trans(internal,statssummary2d), stats2d_summary_trans(internal,statssummary2d), stats2d_trans(internal,double precision,double precision), stats2d_trans_deserialize(bytea,internal), stats2d_trans_serialize(internal), stats_agg(double precision), stats_agg(double precision,double precision), stats_agg_no_inv(double precision), stats_agg_no_inv(double precision,double precision), statssummary1d_in(cstring), statssummary1d_out(statssummary1d), statssummary2d_in(cstring), statssummary2d_out(statssummary2d), stddev(statssummary1d,text), stddev_x(statssummary2d,text), stddev_y(statssummary2d,text), sum(statssummary1d), sum_x(statssummary2d), sum_y(statssummary2d), variance(statssummary1d,text), variance_x(statssummary2d,text), variance_y(statssummary2d,text), x_intercept(statssummary2d), distinct_count(hyperloglog), } } crate::types_stabilized_at! { STABLE_TYPES "1.16.0" => { accessornumgaps, accessornumliveranges, heartbeattrimtoaccessor, accessorpercentilearray, maxbyfloats, maxbyints, maxbytimes, maxfloats, maxints, maxtimes, minbyfloats, minbyints, minbytimes, minfloats, minints, mintimes, accessorintoarray, accessormaxfrequencyint, accessorminfrequencyint, accessortopn, accessortopncount, spacesavingaggregate, spacesavingbigintaggregate, spacesavingtextaggregate, } "1.15.0" => { counterinterpolateddeltaaccessor, counterinterpolatedrateaccessor, timeweightinterpolatedaverageaccessor, timeweightinterpolatedintegralaccessor, accessorintegral, heartbeatagg, accessordeadranges, accessordowntime, accessorliveat, accessorliveranges, accessoruptime, heartbeatinterpolateaccessor, heartbeatinterpolateddowntimeaccessor, heartbeatinterpolateduptimeaccessor, stateagg, accessordurationin, accessordurationinint, accessordurationinrange, accessordurationinrangeint, accessorinterpolateddurationin, accessorinterpolateddurationinint, accessorinterpolatedstateinttimeline, accessorinterpolatedstateperiods, accessorinterpolatedstateperiodsint, accessorinterpolatedstatetimeline, accessorintointvalues, accessorintovalues, accessorstateat, accessorstateatint, accessorstateinttimeline, accessorstateperiods, accessorstateperiodsint, accessorstatetimeline, } "1.14.0" => { candlestick, accessorclose, accessorclosetime, accessorhigh, accessorhightime, accessorlow, accessorlowtime, accessoropen, accessoropentime, } "1.11.0" => { accessorfirsttime, accessorfirstval, accessorlasttime, accessorlastval, } "1.9.0" => { accessorapproxpercentile, accessorapproxpercentilerank, accessoraverage, accessoraveragex, accessoraveragey, accessorcorr, accessorcounterzerotime, accessorcovar, accessordelta, accessordeterminationcoeff, accessordistinctcount, accessorerror, accessorextrapolateddelta, accessorextrapolatedrate, accessorideltaleft, accessorideltaright, accessorintercept, accessorirateleft, accessorirateright, accessorkurtosis, accessorkurtosisx, accessorkurtosisy, accessormaxval, accessormean, accessorminval, accessornumchanges, accessornumelements, accessornumresets, accessornumvals, accessorrate, accessorskewness, accessorskewnessx, accessorskewnessy, accessorslope, accessorstddev, accessorstddevx, accessorstddevy, accessorstderror, accessorsum, accessorsumx, accessorsumy, accessortimedelta, accessorunnest, accessorvariance, accessorvariancex, accessorvariancey, accessorwithbounds, accessorxintercept, timevector_tstz_f64, } "1.8.0" => { } "1.7.0" => { } "1.6.0" => { } "1.5" => { } "prehistory" => { uddsketch, tdigest, timeweightsummary, countersummary, hyperloglog, statssummary1d, statssummary2d, } } crate::operators_stabilized_at! { STABLE_OPERATORS "1.16.0" => { "->"(heartbeatagg,accessornumgaps), "->"(heartbeatagg,accessornumliveranges), "->"(heartbeatagg,heartbeattrimtoaccessor), "->"(uddsketch,accessorpercentilearray), "->"(maxfloats,accessorintoarray), "->"(maxfloats,accessorintovalues), "->"(maxints,accessorintoarray), "->"(maxints,accessorintovalues), "->"(maxtimes,accessorintoarray), "->"(maxtimes,accessorintovalues), "->"(minfloats,accessorintoarray), "->"(minfloats,accessorintovalues), "->"(minints,accessorintoarray), "->"(minints,accessorintovalues), "->"(mintimes,accessorintoarray), "->"(mintimes,accessorintovalues), "->"(spacesavingbigintaggregate,accessorintovalues), "->"(spacesavingbigintaggregate,accessormaxfrequencyint), "->"(spacesavingbigintaggregate,accessorminfrequencyint), "->"(spacesavingbigintaggregate,accessortopn), "->"(spacesavingbigintaggregate,accessortopncount), "->"(spacesavingtextaggregate,accessorintovalues), "->"(spacesavingtextaggregate,accessortopn), "->"(spacesavingtextaggregate,accessortopncount), } "1.15.0" => { "->"(countersummary,counterinterpolateddeltaaccessor), "->"(countersummary,counterinterpolatedrateaccessor), "->"(timeweightsummary,timeweightinterpolatedaverageaccessor), "->"(timeweightsummary,timeweightinterpolatedintegralaccessor), "->"(timeweightsummary,accessorintegral), "->"(heartbeatagg,accessordeadranges), "->"(heartbeatagg,accessordowntime), "->"(heartbeatagg,accessorliveat), "->"(heartbeatagg,accessorliveranges), "->"(heartbeatagg,accessoruptime), "->"(heartbeatagg,heartbeatinterpolateaccessor), "->"(heartbeatagg,heartbeatinterpolateddowntimeaccessor), "->"(heartbeatagg,heartbeatinterpolateduptimeaccessor), "->"(stateagg,accessordurationin), "->"(stateagg,accessordurationinint), "->"(stateagg,accessordurationinrange), "->"(stateagg,accessordurationinrangeint), "->"(stateagg,accessorinterpolateddurationin), "->"(stateagg,accessorinterpolateddurationinint), "->"(stateagg,accessorinterpolatedstateinttimeline), "->"(stateagg,accessorinterpolatedstateperiods), "->"(stateagg,accessorinterpolatedstateperiodsint), "->"(stateagg,accessorinterpolatedstatetimeline), "->"(stateagg,accessorintointvalues), "->"(stateagg,accessorintovalues), "->"(stateagg,accessorstateat), "->"(stateagg,accessorstateatint), "->"(stateagg,accessorstateinttimeline), "->"(stateagg,accessorstateperiods), "->"(stateagg,accessorstateperiodsint), "->"(stateagg,accessorstatetimeline), } "1.14.0" => { "->"(candlestick,accessorclose), "->"(candlestick,accessorclosetime), "->"(candlestick,accessorhigh), "->"(candlestick,accessorhightime), "->"(candlestick,accessorlow), "->"(candlestick,accessorlowtime), "->"(candlestick,accessoropen), "->"(candlestick,accessoropentime), } "1.11.0" => { "->"(countersummary,accessorfirsttime), "->"(countersummary,accessorfirstval), "->"(countersummary,accessorlasttime), "->"(countersummary,accessorlastval), "->"(timeweightsummary,accessorfirsttime), "->"(timeweightsummary,accessorfirstval), "->"(timeweightsummary,accessorlasttime), "->"(timeweightsummary,accessorlastval), } "1.9.0" => { "->"(countersummary,accessorcorr), "->"(countersummary,accessorcounterzerotime), "->"(countersummary,accessordelta), "->"(countersummary,accessorextrapolateddelta), "->"(countersummary,accessorextrapolatedrate), "->"(countersummary,accessorideltaleft), "->"(countersummary,accessorideltaright), "->"(countersummary,accessorintercept), "->"(countersummary,accessorirateleft), "->"(countersummary,accessorirateright), "->"(countersummary,accessornumchanges), "->"(countersummary,accessornumelements), "->"(countersummary,accessornumresets), "->"(countersummary,accessorrate), "->"(countersummary,accessorslope), "->"(countersummary,accessortimedelta), "->"(countersummary,accessorwithbounds), "->"(hyperloglog,accessordistinctcount), "->"(hyperloglog,accessorstderror), "->"(statssummary1d,accessoraverage), "->"(statssummary1d,accessorkurtosis), "->"(statssummary1d,accessornumvals), "->"(statssummary1d,accessorskewness), "->"(statssummary1d,accessorstddev), "->"(statssummary1d,accessorsum), "->"(statssummary1d,accessorvariance), "->"(statssummary2d,accessoraveragex), "->"(statssummary2d,accessoraveragey), "->"(statssummary2d,accessorcorr), "->"(statssummary2d,accessorcovar), "->"(statssummary2d,accessordeterminationcoeff), "->"(statssummary2d,accessorintercept), "->"(statssummary2d,accessorkurtosisx), "->"(statssummary2d,accessorkurtosisy), "->"(statssummary2d,accessornumvals), "->"(statssummary2d,accessorskewnessx), "->"(statssummary2d,accessorskewnessy), "->"(statssummary2d,accessorslope), "->"(statssummary2d,accessorstddevx), "->"(statssummary2d,accessorstddevy), "->"(statssummary2d,accessorsumx), "->"(statssummary2d,accessorsumy), "->"(statssummary2d,accessorvariancex), "->"(statssummary2d,accessorvariancey), "->"(statssummary2d,accessorxintercept), "->"(tdigest,accessorapproxpercentile), "->"(tdigest,accessorapproxpercentilerank), "->"(tdigest,accessormaxval), "->"(tdigest,accessormean), "->"(tdigest,accessorminval), "->"(tdigest,accessornumvals), "->"(timevector_tstz_f64,accessorunnest), "->"(timeweightsummary,accessoraverage), "->"(uddsketch,accessorapproxpercentile), "->"(uddsketch,accessorapproxpercentilerank), "->"(uddsketch,accessorerror), "->"(uddsketch,accessormean), "->"(uddsketch,accessornumvals), } "1.8.0" => { } "1.7.0" => { } "1.6.0" => { } "1.5" => { } "prehistory" => { } } ================================================ FILE: extension/src/stabilization_tests.rs ================================================ #[cfg(any(test, feature = "pg_test"))] use pgrx::*; #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use std::collections::HashSet; use pgrx::*; use pgrx_macros::pg_test; // Test that any new features are added to the the experimental schema #[pg_test] fn test_schema_qualification() { Spi::connect_mut(|client| { let stable_functions: HashSet = stable_functions(); let stable_types: HashSet = stable_types(); let stable_operators: HashSet = stable_operators(); let unexpected_features: Vec<_> = client .update( "SELECT pg_catalog.pg_describe_object(classid, objid, 0) \ FROM pg_catalog.pg_extension e, pg_catalog.pg_depend d \ WHERE e.extname='timescaledb_toolkit' \ AND refclassid = 'pg_catalog.pg_extension'::pg_catalog.regclass \ AND d.refobjid = e.oid \ AND deptype = 'e' ORDER BY 1", None, &[], ) .unwrap() .filter_map(|row| { let val: String = row .get_datum_by_ordinal(1) .unwrap() .value() .unwrap() .unwrap(); if let Some(schema) = val.strip_prefix("schema ") { // the only schemas we should define are // `toolkit_experimental` our experimental schema, and // `tests` which contains our pgrx-style regression tests // (including the function currently running) match schema { "toolkit_experimental" => return None, "tests" => return None, _ => return Some(val), } } if let Some(ty) = val.strip_prefix("type ") { // types in the experimental schema are experimental if ty.starts_with("toolkit_experimental.") { return None; } // PG17 started automatically creating an array type for types, so we need // to take those into account. let ty_no_array = ty.replace("[]", ""); if stable_types.contains(ty) || stable_types.contains(&ty_no_array) { return None; } return Some(val); } if let Some(function) = val.strip_prefix("function ") { // functions in the experimental schema are experimental if function.starts_with("toolkit_experimental.") { return None; } // functions in test schema only exist for tests and // won't be in release versions of the extension if function.starts_with("tests.") { return None; } // arrow functions outside the experimental schema are // considered experimental as long as one of their argument // types are experimental (`#[pg_operator]` doesn't allow // us to declare these in a schema and the operator using // them not in the schema). We use name-based resolution // to tell if a function exists to implement an arrow // operator because we didn't have a better method let is_arrow = function.starts_with("arrow_") || function.starts_with("finalize_with"); if is_arrow && function.contains("toolkit_experimental.") { return None; } if stable_functions.contains(function) { return None; } // Hack to fix the function macro's inability to handle [] in the type double precision[]. if function == "approx_percentile_array(double precision[],uddsketch)" || function == "approx_percentiles(double precision[])" { return None; } return Some(val); } if let Some(operator) = val.strip_prefix("operator ") { // we generally don't put operators in the experimental // schema if we can avoid it because we consider the // `OPERATOR(schema.)` syntax to be to much a // usability hazard. Instead we rely on one of the input // types being experimental and the cascading nature of // drop. This means that we consider an operator // unstable if either of its arguments or the operator // itself are in the experimental schema if operator.contains("toolkit_experimental.") { return None; } if stable_operators.contains(operator) { return None; } return Some(val); } if let Some(cast) = val.strip_prefix("cast ") { // casts cannot be schema-qualified, so we rely on one // of the types involved being experimental and the // cascading nature of drop. This means that we consider // a cast unstable if and only if one of the types // involved are in the experimental schema if cast.contains("toolkit_experimental.") { return None; } return Some(val); } Some(val) }) .collect(); if unexpected_features.is_empty() { return; } panic!("unexpectedly released features: {unexpected_features:#?}") }); } fn stable_functions() -> HashSet { crate::stabilization_info::STABLE_FUNCTIONS() } fn stable_types() -> HashSet { crate::stabilization_info::STABLE_TYPES() } fn stable_operators() -> HashSet { crate::stabilization_info::STABLE_OPERATORS() } } #[macro_export] macro_rules! functions_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($fn_name: ident ( $( $($fn_type: ident)+ ),* ) ),* $(,)? } )* ) => { #[cfg(any(test, feature = "pg_test"))] #[allow(non_snake_case)] // we do this instead of just stringifying everything b/c stringify adds // whitespace in places we don't want pub fn $export_symbol() -> std::collections::HashSet { static FUNCTIONS: &[(&str, &[&str])] = &[ $( $( ( stringify!($fn_name), &[ $( stringify!($($fn_type)+) ),* ] ), )* )* ]; FUNCTIONS.iter().map(|(name, types)| { format!("{}({})", name, types.join(",")) }).collect() } }; } #[macro_export] macro_rules! types_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($type_name: ident),* $(,)? } )* ) => { #[cfg(any(test, feature = "pg_test"))] #[allow(non_snake_case)] // we do this instead of just stringifying everything b/c stringify adds // whitespace in places we don't want pub fn $export_symbol() -> std::collections::HashSet { pub static TYPES: &[&str] = &[ $( $(stringify!($type_name),)* )* ]; TYPES.iter().map(|s| s.to_ascii_lowercase()).collect() } }; } #[macro_export] macro_rules! operators_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($operator_name: literal ( $( $($fn_type: ident)+ ),* ) ),* $(,)? } )* ) => { #[cfg(any(test, feature = "pg_test"))] #[allow(non_snake_case)] pub fn $export_symbol() -> std::collections::HashSet { static OPERATORS: &[(&str, &[&str])] = &[ $( $( ( $operator_name, &[ $( stringify!($($fn_type)+) ),* ] ), )* )* ]; OPERATORS.iter().map(|(name, types)| { format!("{}({})", name, types.join(",")) }).collect() } }; } ================================================ FILE: extension/src/state_aggregate/accessors.rs ================================================ use crate::{ datum_utils::interval_to_ms, pg_type, raw::{Interval, TimestampTz}, ron_inout_funcs, state_aggregate::*, }; pg_type! { struct AccessorInterpolatedStateTimeline<'input> { start: i64, interval: i64, prev: StateAggData<'input>, prev_present: bool, } } ron_inout_funcs!(AccessorInterpolatedStateTimeline<'input>); #[pg_extern(immutable, parallel_safe, name = "interpolated_state_timeline")] fn accessor_state_agg_interpolated_interpolated_state_timeline<'a>( start: TimestampTz, interval: Interval, prev: Option>, ) -> AccessorInterpolatedStateTimeline<'a> { crate::build! { AccessorInterpolatedStateTimeline { interval: interval_to_ms(&start, &interval), start: start.into(), prev_present: prev.is_some(), prev: prev.unwrap_or_else(|| StateAgg::empty(false)).0, } } } pg_type! { struct AccessorInterpolatedStateIntTimeline<'input> { start: i64, interval: i64, prev: StateAggData<'input>, prev_present: bool, } } ron_inout_funcs!(AccessorInterpolatedStateIntTimeline<'input>); #[pg_extern(immutable, parallel_safe, name = "interpolated_state_int_timeline")] fn accessor_state_agg_interpolated_interpolated_state_int_timeline<'a>( start: TimestampTz, interval: Interval, prev: Option>, ) -> AccessorInterpolatedStateIntTimeline<'a> { crate::build! { AccessorInterpolatedStateIntTimeline { interval: interval_to_ms(&start, &interval), start: start.into(), prev_present: prev.is_some(), prev: prev.unwrap_or_else(|| StateAgg::empty(false)).0, } } } // weird ordering is needed for alignment pg_type! { struct AccessorInterpolatedDurationIn<'input> { start: i64, interval: i64, state_len: u32, padding_2: [u8; 4], prev: StateAggData<'input>, state_bytes: [u8; self.state_len], prev_present: bool, } } ron_inout_funcs!(AccessorInterpolatedDurationIn<'input>); pg_type! { struct AccessorInterpolatedDurationInInt<'input> { start: i64, interval: i64, state: i64, prev_present: bool, padding_2: [u8; 7], prev: StateAggData<'input>, } } ron_inout_funcs!(AccessorInterpolatedDurationInInt<'input>); #[pg_extern(immutable, parallel_safe, name = "interpolated_duration_in")] fn accessor_state_agg_interpolated_interpolated_duration_in<'a>( state: String, start: TimestampTz, interval: Interval, prev: Option>, ) -> AccessorInterpolatedDurationIn<'a> { crate::build! { AccessorInterpolatedDurationIn { state_len: state.len().try_into().unwrap(), state_bytes: state.into_bytes().into(), interval: interval_to_ms(&start, &interval), start: start.into(), prev_present: prev.is_some(), prev: prev.unwrap_or_else(|| StateAgg::empty(false)).0, padding_2: Default::default(), } } } #[pg_extern(immutable, parallel_safe, name = "interpolated_duration_in")] fn accessor_state_agg_interpolated_interpolated_duration_in_int<'a>( state: i64, start: TimestampTz, interval: Interval, prev: Option>, ) -> AccessorInterpolatedDurationInInt<'a> { crate::build! { AccessorInterpolatedDurationInInt { state, interval: interval_to_ms(&start, &interval), start: start.into(), prev_present: prev.is_some(), prev: prev.unwrap_or_else(|| StateAgg::empty(false)).0, padding_2: Default::default(), } } } // weird ordering is needed for alignment pg_type! { struct AccessorInterpolatedStatePeriods<'input> { start: i64, interval: i64, state_len: u32, padding_2: [u8; 4], prev: StateAggData<'input>, state_bytes: [u8; self.state_len], prev_present: bool, } } ron_inout_funcs!(AccessorInterpolatedStatePeriods<'input>); pg_type! { struct AccessorInterpolatedStatePeriodsInt<'input> { start: i64, interval: i64, state: i64, prev_present: bool, padding_2: [u8; 7], prev: StateAggData<'input>, } } ron_inout_funcs!(AccessorInterpolatedStatePeriodsInt<'input>); #[pg_extern(immutable, parallel_safe, name = "interpolated_state_periods")] fn accessor_state_agg_interpolated_interpolated_state_periods<'a>( state: String, start: TimestampTz, interval: Interval, prev: Option>, ) -> AccessorInterpolatedStatePeriods<'a> { crate::build! { AccessorInterpolatedStatePeriods { state_len: state.len().try_into().unwrap(), state_bytes: state.into_bytes().into(), interval: interval_to_ms(&start, &interval), start: start.into(), prev_present: prev.is_some(), prev: prev.unwrap_or_else(|| StateAgg::empty(false)).0, padding_2: Default::default(), } } } #[pg_extern(immutable, parallel_safe, name = "interpolated_state_periods")] fn accessor_state_agg_interpolated_interpolated_state_periods_int<'a>( state: i64, start: TimestampTz, interval: Interval, prev: Option>, ) -> AccessorInterpolatedStatePeriodsInt<'a> { crate::build! { AccessorInterpolatedStatePeriodsInt { state, interval: interval_to_ms(&start, &interval), start: start.into(), prev_present: prev.is_some(), prev: prev.unwrap_or_else(|| StateAgg::empty(false)).0, padding_2: Default::default(), } } } pg_type! { struct AccessorDurationIn<'input> { state_len: u32, state_bytes: [u8; self.state_len], } } ron_inout_funcs!(AccessorDurationIn<'input>); pg_type! { struct AccessorDurationInInt { state: i64, } } ron_inout_funcs!(AccessorDurationInInt); #[pg_extern(immutable, parallel_safe, name = "duration_in")] fn accessor_state_agg_duration_in(state: String) -> AccessorDurationIn<'static> { crate::build! { AccessorDurationIn { state_len: state.len().try_into().unwrap(), state_bytes: state.into_bytes().into(), } } } #[pg_extern(immutable, parallel_safe, name = "duration_in")] fn accessor_state_agg_duration_in_int(state: i64) -> AccessorDurationInInt { crate::build! { AccessorDurationInInt { state, } } } pg_type! { struct AccessorStatePeriods<'input> { state_len: u32, state_bytes: [u8; self.state_len], } } ron_inout_funcs!(AccessorStatePeriods<'input>); pg_type! { struct AccessorStatePeriodsInt { state: i64, } } ron_inout_funcs!(AccessorStatePeriodsInt); #[pg_extern(immutable, parallel_safe, name = "state_periods")] fn accessor_state_agg_state_periods<'a>(state: String) -> AccessorStatePeriods<'static> { crate::build! { AccessorStatePeriods { state_len: state.len().try_into().unwrap(), state_bytes: state.into_bytes().into(), } } } #[pg_extern(immutable, parallel_safe, name = "state_periods")] fn accessor_state_agg_state_periods_int(state: i64) -> AccessorStatePeriodsInt { crate::build! { AccessorStatePeriodsInt { state, } } } pg_type! { struct AccessorDurationInRange<'input> { state_len: u32, padding_2: [u8; 4], start: i64, interval: i64, state_bytes: [u8; self.state_len], } } ron_inout_funcs!(AccessorDurationInRange<'input>); pg_type! { struct AccessorDurationInRangeInt { state: i64, start: i64, interval: i64, } } ron_inout_funcs!(AccessorDurationInRangeInt); #[pg_extern(immutable, parallel_safe, name = "duration_in")] fn accessor_state_agg_duration_in_range( state: String, start: TimestampTz, interval: default!(Option, "NULL"), ) -> AccessorDurationInRange<'static> { let interval = interval .map(|interval| crate::datum_utils::interval_to_ms(&start, &interval)) .unwrap_or(NO_INTERVAL_MARKER); let start = start.into(); crate::build! { AccessorDurationInRange { state_len: state.len().try_into().unwrap(), state_bytes: state.into_bytes().into(), padding_2: [0; 4], start, interval } } } #[pg_extern(immutable, parallel_safe, name = "duration_in")] fn accessor_state_agg_duration_in_range_int( state: i64, start: TimestampTz, interval: default!(Option, "NULL"), ) -> AccessorDurationInRangeInt { let interval = interval .map(|interval| crate::datum_utils::interval_to_ms(&start, &interval)) .unwrap_or(NO_INTERVAL_MARKER); let start = start.into(); crate::build! { AccessorDurationInRangeInt { state, start, interval } } } pg_type! { struct AccessorStateAt { time: i64, } } ron_inout_funcs!(AccessorStateAt); #[pg_extern(immutable, parallel_safe, name = "state_at")] fn accessor_state_agg_state_at(time: TimestampTz) -> AccessorStateAt { crate::build! { AccessorStateAt { time: time.into(), } } } pg_type! { struct AccessorStateAtInt { time: i64, } } ron_inout_funcs!(AccessorStateAtInt); #[pg_extern(immutable, parallel_safe, name = "state_at_int")] fn accessor_state_agg_state_at_int(time: TimestampTz) -> AccessorStateAtInt { crate::build! { AccessorStateAtInt { time: time.into(), } } } ================================================ FILE: extension/src/state_aggregate/rollup.rs ================================================ use super::{toolkit_experimental::*, *}; use crate::{ aggregate_utils::in_aggregate_context, palloc::{InternalAsValue, ToInternal}, }; use serde::{Deserialize, Serialize}; extension_sql!( "CREATE AGGREGATE toolkit_experimental.rollup( value toolkit_experimental.CompactStateAgg ) ( sfunc = toolkit_experimental.compact_state_agg_rollup_trans, stype = internal, finalfunc = toolkit_experimental.compact_state_agg_rollup_final, combinefunc = state_agg_rollup_combine, serialfunc = state_agg_rollup_serialize, deserialfunc = state_agg_rollup_deserialize, parallel = restricted );", name = "compact_state_agg_rollup", requires = [ compact_state_agg_rollup_trans, compact_state_agg_rollup_final, state_agg_rollup_combine, state_agg_rollup_serialize, state_agg_rollup_deserialize, CompactStateAgg, ], ); extension_sql!( "CREATE AGGREGATE rollup( value StateAgg ) ( sfunc = state_agg_rollup_trans, stype = internal, finalfunc = state_agg_rollup_final, combinefunc = state_agg_rollup_combine, serialfunc = state_agg_rollup_serialize, deserialfunc = state_agg_rollup_deserialize, parallel = restricted );", name = "state_agg_rollup", requires = [ state_agg_rollup_trans, state_agg_rollup_final, state_agg_rollup_combine, state_agg_rollup_serialize, state_agg_rollup_deserialize, StateAgg, ], ); #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RollupTransState { values: Vec, compact: bool, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] struct OwnedCompactStateAgg { durations: Vec, combined_durations: Vec, first_time: i64, last_time: i64, first_state: u32, last_state: u32, states: Vec, compact: bool, integer_states: bool, } impl OwnedCompactStateAgg { pub fn merge(self, other: Self) -> Self { assert_eq!( self.compact, other.compact, "can't merge compact_state_agg and state_agg" ); assert_eq!( self.integer_states, other.integer_states, "can't merge aggs with different state types" ); let (earlier, later) = match self.cmp(&other) { Ordering::Less => (self, other), Ordering::Greater => (other, self), Ordering::Equal => panic!( "can't merge overlapping aggregates (same start time: {})", self.first_time ), }; assert!( earlier.last_time <= later.first_time, "can't merge overlapping aggregates (earlier={}-{}, later={}-{})", earlier.first_time, earlier.last_time, later.first_time, later.last_time, ); assert_ne!( later.durations.len(), 0, "later aggregate must be non-empty" ); assert_ne!( earlier.durations.len(), 0, "earlier aggregate must be non-empty" ); let later_states = String::from_utf8(later.states.to_vec()).expect("invalid later UTF-8 states"); let mut merged_states = String::from_utf8(earlier.states.to_vec()).expect("invalid earlier UTF-8 states"); let mut merged_durations = earlier.durations.into_iter().collect::>(); let earlier_len = earlier.combined_durations.len(); let mut merged_last_state = None; for (later_idx, dis) in later.durations.iter().enumerate() { let materialized_dis = dis.state.materialize(&later_states); let merged_duration_info = merged_durations .iter_mut() .enumerate() .find(|(_, merged_dis)| { merged_dis.state.materialize(&merged_states) == materialized_dis }); let merged_idx = if let Some((merged_idx, merged_duration_to_update)) = merged_duration_info { merged_duration_to_update.duration += dis.duration; merged_idx } else { let state = materialized_dis.entry(&mut merged_states); merged_durations.push(DurationInState { state, duration: dis.duration, }); merged_durations.len() - 1 }; if later_idx == later.last_state as usize { // this is the last state merged_last_state = Some(merged_idx); }; } let merged_last_state = merged_last_state.expect("later last_state not in later.durations") as u32; let mut combined_durations = earlier .combined_durations .into_iter() .chain(later.combined_durations.into_iter().map(|tis| { let state = tis .state .materialize(&later_states) .existing_entry(&merged_states); TimeInState { state, ..tis } })) .collect::>(); let gap = later.first_time - earlier.last_time; assert!(gap >= 0); merged_durations .get_mut(earlier.last_state as usize) .expect("earlier.last_state doesn't point to a state") .duration += gap; // ensure combined_durations covers the whole range of time if !earlier.compact { if combined_durations .get_mut(earlier_len - 1) .expect("invalid combined_durations: nothing at end of earlier") .state .materialize(&merged_states) == combined_durations .get(earlier_len) .expect("invalid combined_durations: nothing at start of earlier") .state .materialize(&merged_states) { combined_durations .get_mut(earlier_len - 1) .expect("invalid combined_durations (nothing at earlier_len - 1, equal)") .end_time = combined_durations.remove(earlier_len).end_time; } else { combined_durations .get_mut(earlier_len - 1) .expect("invalid combined_durations (nothing at earlier_len - 1, not equal)") .end_time = combined_durations .get(earlier_len) .expect("invalid combined_durations (nothing at earlier_len, not equal)") .start_time; } } let merged_states = merged_states.into_bytes(); OwnedCompactStateAgg { states: merged_states, durations: merged_durations, combined_durations, first_time: earlier.first_time, last_time: later.last_time, first_state: earlier.first_state, // indexes into earlier durations are same for merged_durations last_state: merged_last_state, // these values are always the same for both compact: earlier.compact, integer_states: earlier.integer_states, } } } impl<'a> From for CompactStateAgg<'a> { fn from(owned: OwnedCompactStateAgg) -> CompactStateAgg<'a> { unsafe { flatten!(CompactStateAgg { states_len: owned.states.len() as u64, states: (&*owned.states).into(), durations_len: owned.durations.len() as u64, durations: (&*owned.durations).into(), combined_durations: (&*owned.combined_durations).into(), combined_durations_len: owned.combined_durations.len() as u64, first_time: owned.first_time, last_time: owned.last_time, first_state: owned.first_state, last_state: owned.last_state, compact: owned.compact, integer_states: owned.integer_states, }) } } } impl<'a> From> for OwnedCompactStateAgg { fn from(agg: CompactStateAgg<'a>) -> OwnedCompactStateAgg { OwnedCompactStateAgg { states: agg.states.iter().collect::>(), durations: agg.durations.iter().collect::>(), combined_durations: agg.combined_durations.iter().collect::>(), first_time: agg.first_time, last_time: agg.last_time, first_state: agg.first_state, last_state: agg.last_state, compact: agg.compact, integer_states: agg.integer_states, } } } impl PartialOrd for OwnedCompactStateAgg { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for OwnedCompactStateAgg { fn cmp(&self, other: &Self) -> Ordering { // compare using first time (OwnedCompactStateAgg::merge will handle any overlap) self.first_time.cmp(&other.first_time) } } impl RollupTransState { fn merge(&mut self) { // OwnedCompactStateAgg::merge can't merge overlapping aggregates self.values.sort(); self.values = self .values .drain(..) .reduce(|a, b| a.merge(b)) .map(|val| vec![val]) .unwrap_or_else(Vec::new); } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn compact_state_agg_rollup_trans( state: Internal, next: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { compact_state_agg_rollup_trans_inner(unsafe { state.to_inner() }, next, fcinfo).internal() } pub fn compact_state_agg_rollup_trans_inner( state: Option>, next: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, next) { (None, None) => None, (None, Some(next)) => Some( RollupTransState { values: vec![next.into()], compact: false, } .into(), ), (Some(state), None) => Some(state), (Some(mut state), Some(next)) => { state.values.push(next.into()); Some(state) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn state_agg_rollup_trans( state: Internal, next: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { compact_state_agg_rollup_trans_inner( unsafe { state.to_inner() }, next.map(StateAgg::as_compact_state_agg), fcinfo, ) .internal() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn compact_state_agg_rollup_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { compact_state_agg_rollup_final_inner(unsafe { state.to_inner() }, fcinfo) } fn compact_state_agg_rollup_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state.clone(), }; state.merge(); assert!(state.values.len() == 1); let agg: Option = state.values.drain(..).next().unwrap().into(); agg.map(Into::into) }) } } #[pg_extern(immutable, parallel_safe)] fn state_agg_rollup_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { state_agg_rollup_final_inner(unsafe { state.to_inner() }, fcinfo) } fn state_agg_rollup_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state.clone(), }; state.merge(); assert!(state.values.len() == 1); let agg: Option = state.values.drain(..).next().unwrap().into(); agg.map(Into::into).map(StateAgg::new) }) } } #[pg_extern(immutable, parallel_safe, strict)] pub fn state_agg_rollup_serialize(state: Internal) -> bytea { let mut state: Inner = unsafe { state.to_inner().unwrap() }; state.merge(); crate::do_serialize!(state) } #[pg_extern(strict, immutable, parallel_safe)] pub fn state_agg_rollup_deserialize(bytes: bytea, _internal: Internal) -> Option { state_agg_rollup_deserialize_inner(bytes).internal() } pub fn state_agg_rollup_deserialize_inner(bytes: bytea) -> Inner { let t: RollupTransState = crate::do_deserialize!(bytes, RollupTransState); t.into() } #[pg_extern(immutable, parallel_safe)] pub fn state_agg_rollup_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { state_agg_rollup_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } #[allow(clippy::redundant_clone)] // clone is needed so we don't mutate shared memory pub fn state_agg_rollup_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (Some(x), None) => Some(x.clone().into()), (None, Some(x)) => Some(x.clone().into()), (Some(x), Some(y)) => { let compact = x.compact; assert_eq!( compact, y.compact, "trying to merge compact and non-compact state aggs, this should be unreachable" ); let values = x .values .iter() .chain(y.values.iter()) .map(Clone::clone) .collect::>(); let trans_state = RollupTransState { values, compact }; Some(trans_state.clone().into()) } }) } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; #[pg_test] #[should_panic = "can't merge overlapping aggregates"] fn merge_range_full_overlap() { let mut outer: OwnedCompactStateAgg = CompactStateAgg::empty(false, false).into(); outer.first_time = 10; outer.last_time = 50; let mut inner: OwnedCompactStateAgg = CompactStateAgg::empty(false, false).into(); inner.first_time = 20; inner.last_time = 30; inner.merge(outer); } #[pg_test] #[should_panic = "can't merge overlapping aggregates"] fn merge_range_partial_overlap() { let mut r1: OwnedCompactStateAgg = CompactStateAgg::empty(false, false).into(); r1.first_time = 10; r1.last_time = 50; let mut r2: OwnedCompactStateAgg = CompactStateAgg::empty(false, false).into(); r2.first_time = 20; r2.last_time = 50; r2.merge(r1); } #[test] fn merges_compact_aggs_correctly() { let s1 = OwnedCompactStateAgg { durations: vec![ DurationInState { duration: 500, state: StateEntry::from_integer(5_552), }, DurationInState { duration: 400, state: StateEntry::from_integer(5_551), }, ], combined_durations: vec![], first_time: 100, last_time: 1000, first_state: 1, last_state: 0, states: vec![], compact: true, integer_states: true, }; let s2 = OwnedCompactStateAgg { durations: vec![ DurationInState { duration: 500, state: StateEntry::from_integer(5_552), }, DurationInState { duration: 400, state: StateEntry::from_integer(5_551), }, ], combined_durations: vec![], first_time: 1000 + 12345, last_time: 1900 + 12345, first_state: 1, last_state: 0, states: vec![], compact: true, integer_states: true, }; let s3 = OwnedCompactStateAgg { durations: vec![ DurationInState { duration: 500, state: StateEntry::from_integer(5_552), }, DurationInState { duration: 400, state: StateEntry::from_integer(5_551), }, ], combined_durations: vec![], first_time: 1900 + 12345, last_time: 1900 + 12345 + 900, first_state: 1, last_state: 0, states: vec![], compact: true, integer_states: true, }; let expected = OwnedCompactStateAgg { durations: vec![ DurationInState { duration: 500 * 3 + 12345, state: StateEntry::from_integer(5_552), }, DurationInState { duration: 400 * 3, state: StateEntry::from_integer(5_551), }, ], combined_durations: vec![], first_time: 100, last_time: 1900 + 12345 + 900, first_state: 1, last_state: 0, states: vec![], compact: true, integer_states: true, }; let merged = s1.clone().merge(s2.clone().merge(s3.clone())); assert_eq!(merged, expected); let merged = s3.clone().merge(s2.clone().merge(s1.clone())); assert_eq!(merged, expected); let mut trans_state = RollupTransState { values: vec![s1.clone(), s2.clone(), s3.clone()], compact: true, }; trans_state.merge(); assert_eq!(trans_state.values.len(), 1); assert_eq!(trans_state.values[0], expected.clone()); let mut trans_state = RollupTransState { values: vec![s3.clone(), s1.clone(), s2.clone()], compact: true, }; trans_state.merge(); assert_eq!(trans_state.values.len(), 1); assert_eq!(trans_state.values[0], expected.clone()); } } ================================================ FILE: extension/src/state_aggregate.rs ================================================ //! SELECT duration_in('STOPPED', states) as run_time, duration_in('ERROR', states) as error_time FROM ( //! SELECT compact_state_agg(time, state) as states FROM ... //! ); //! //! Currently requires loading all data into memory in order to sort it by time. #![allow(non_camel_case_types)] use pgrx::{iter::TableIterator, *}; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; use aggregate_builder::aggregate; use flat_serialize::*; use flat_serialize_macro::FlatSerializable; use crate::{ accessors::{ AccessorIntoIntValues, AccessorIntoValues, AccessorStateIntTimeline, AccessorStateTimeline, }, flatten, palloc::{Inner, Internal}, pg_type, raw::{bytea, TimestampTz}, ron_inout_funcs, }; use toolkit_experimental::{CompactStateAgg, CompactStateAggData}; mod accessors; use accessors::*; pub mod rollup; /// The data of a state. #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, Serialize, Deserialize)] #[repr(C)] enum MaterializedState { String(String), Integer(i64), } impl MaterializedState { fn entry(&self, states: &mut String) -> StateEntry { match self { Self::Integer(i) => StateEntry { a: i64::MAX, b: *i }, Self::String(s) => StateEntry::from_str(states, s), } } fn existing_entry(&self, states: &str) -> StateEntry { match self { Self::Integer(i) => StateEntry { a: i64::MAX, b: *i }, Self::String(s) => StateEntry::from_existing_str(states, s), } } fn into_string(self) -> String { match self { Self::String(str) => str, _ => panic!("MaterializedState::into_string called with non-string"), } } fn into_integer(self) -> i64 { match self { Self::Integer(int) => int, _ => panic!("MaterializedState::into_integer called with non-integer"), } } } /// A stored state entry. Needs a `states` string to be interpreted. #[derive( Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, FlatSerializable, Serialize, Deserialize, )] #[repr(C)] pub struct StateEntry { a: i64, b: i64, } impl StateEntry { #[cfg(test)] // only used by tests fn from_integer(int: i64) -> Self { Self { a: i64::MAX, b: int, } } fn from_str(states: &mut String, new_state: &str) -> Self { let (a, b) = if let Some(bounds) = states .find(new_state) .map(|idx| (idx as i64, (idx + new_state.len()) as i64)) { bounds } else { let bounds = (states.len() as i64, (states.len() + new_state.len()) as i64); states.push_str(new_state); bounds }; Self { a, b } } fn from_existing_str(states: &str, state: &str) -> Self { if let Some(val) = Self::try_from_existing_str(states, state) { val } else { panic!("Tried to get state that doesn't exist: {state}") } } fn try_from_existing_str(states: &str, state: &str) -> Option { states .find(state) .map(|idx| (idx as i64, (idx + state.len()) as i64)) .map(|bounds| Self { a: bounds.0, b: bounds.1, }) } fn materialize(&self, states: &str) -> MaterializedState { if self.a == i64::MAX { MaterializedState::Integer(self.b) } else { MaterializedState::String( states .get(self.a as usize..self.b as usize) .expect("tried to materialize out-of-bounds state") .to_string(), ) } } fn as_str(self, states: &str) -> &str { assert!(self.a != i64::MAX, "Tried to get non-string state"); states .get(self.a as usize..self.b as usize) .expect("tried to stringify out-of-bounds state") } fn into_integer(self) -> i64 { assert!(self.a == i64::MAX, "Tried to get non-integer state"); self.b } } #[pg_schema] pub mod toolkit_experimental { use super::*; pg_type! { #[derive(Debug)] struct CompactStateAgg<'input> { states_len: u64, // TODO JOSH this and durations_len can be 32 durations_len: u64, durations: [DurationInState; self.durations_len], combined_durations_len: u64, combined_durations: [TimeInState; self.combined_durations_len], first_time: i64, last_time: i64, first_state: u32, last_state: u32, // first/last state are idx into durations, keep together for alignment states: [u8; self.states_len], compact: bool, integer_states: bool, } } impl CompactStateAgg<'_> { pub(super) fn empty(compact: bool, integer_states: bool) -> Self { unsafe { flatten!(CompactStateAgg { states_len: 0, states: Slice::Slice(&[]), durations_len: 0, durations: Slice::Slice(&[]), combined_durations: Slice::Slice(&[]), combined_durations_len: 0, first_time: 0, last_time: 0, first_state: 0, last_state: 0, compact, integer_states, }) } } pub(super) fn new( states: String, durations: Vec, first: Option, last: Option, combined_durations: Option>, integer_states: bool, ) -> Self { let compact = combined_durations.is_none(); if durations.is_empty() { assert!( first.is_none() && last.is_none() && states.is_empty() && combined_durations.map(|v| v.is_empty()).unwrap_or(true) ); return Self::empty(compact, integer_states); } assert!(first.is_some() && last.is_some()); let first = first.unwrap(); let last = last.unwrap(); let states_len = states.len() as u64; let durations_len = durations.len() as u64; let mut first_state = durations.len(); let mut last_state = durations.len(); // Find first and last state for (i, d) in durations.iter().enumerate() { let s = d.state.materialize(&states); if s == first.state { first_state = i; if last_state < durations.len() { break; } } if s == last.state { last_state = i; if first_state < durations.len() { break; } } } assert!(first_state < durations.len() && last_state < durations.len()); let combined_durations = combined_durations.unwrap_or_default(); unsafe { flatten!(CompactStateAgg { states_len, states: states.into_bytes().into(), durations_len, durations: (&*durations).into(), combined_durations: (&*combined_durations).into(), combined_durations_len: combined_durations.len() as u64, first_time: first.time, last_time: last.time, first_state: first_state as u32, last_state: last_state as u32, compact, integer_states, }) } } pub fn get(&self, state: StateEntry) -> Option { self.get_materialized(&state.materialize(self.states_as_str())) } pub(super) fn get_materialized(&self, state: &MaterializedState) -> Option { for record in self.durations.iter() { if record.state.materialize(self.states_as_str()) == *state { return Some(record.duration); } } None } pub(super) fn states_as_str(&self) -> &str { let states: &[u8] = self.states.as_slice(); // SAFETY: came from a String in `new` a few lines up unsafe { std::str::from_utf8_unchecked(states) } } pub(super) fn interpolate( &self, interval_start: i64, interval_len: i64, prev: Option, ) -> CompactStateAgg<'_> { if self.durations.is_empty() { pgrx::error!("unable to interpolate interval on state aggregate with no data"); } if let Some(ref prev) = prev { assert_eq!( prev.integer_states, self.integer_states, "can't interpolate between aggs with different state types" ); } let mut states = std::str::from_utf8(self.states.as_slice()) .unwrap() .to_string(); let mut durations: Vec = self.durations.iter().collect(); let mut combined_durations = if self.compact { None } else { Some(self.combined_durations.iter().collect::>()) }; let first = match prev { Some(prev) if interval_start < self.first_time => { if prev.last_state < prev.durations.len() as u32 { let start_interval = self.first_time - interval_start; let start_state = &prev.durations.as_slice()[prev.last_state as usize] .state .materialize(prev.states_as_str()); // update durations let state = match durations .iter_mut() .find(|x| x.state.materialize(&states) == *start_state) { Some(dis) => { dis.duration += start_interval; dis.state } None => { let state = start_state.entry(&mut states); durations.push(DurationInState { duration: start_interval, state, }); state } }; // update combined_durations if let Some(combined_durations) = combined_durations.as_mut() { // extend last duration let first_cd = combined_durations .first_mut() .expect("poorly formed StateAgg, length mismatch"); let first_cd_state = first_cd.state.materialize(&states); if first_cd_state == *start_state { first_cd.start_time -= start_interval; } else { combined_durations.insert( 0, TimeInState { start_time: interval_start, end_time: self.first_time, state, }, ); }; }; Record { state: start_state.clone(), time: interval_start, } } else { pgrx::error!("unable to interpolate interval on state aggregate where previous agg has no data") } } _ => Record { state: self.durations.as_slice()[self.first_state as usize] .state .materialize(&states), time: self.first_time, }, }; let last = if interval_start + interval_len > self.last_time { let last_interval = interval_start + interval_len - self.last_time; match durations.get_mut(self.last_state as usize) { None => { pgrx::error!("poorly formed state aggregate, last_state out of starts") } Some(dis) => { dis.duration += last_interval; if let Some(combined_durations) = combined_durations.as_mut() { // extend last duration combined_durations .last_mut() .expect("poorly formed state aggregate, length mismatch") .end_time += last_interval; }; Record { state: dis.state.materialize(&states), time: interval_start + interval_len, } } } } else { Record { state: self.durations.as_slice()[self.last_state as usize] .state .materialize(&states), time: self.last_time, } }; CompactStateAgg::new( states, durations, Some(first), Some(last), combined_durations, self.integer_states, ) } pub fn assert_int<'a>(&self) { assert!( self.0.integer_states, "Expected integer state, found string state" ); } pub fn assert_str<'a>(&self) { assert!( !self.0.integer_states, "Expected string state, found integer state" ); } } ron_inout_funcs!(CompactStateAgg<'input>); } pg_type! { #[derive(Debug)] struct StateAgg<'input> { compact_state_agg: CompactStateAggData<'input>, } } impl<'input> StateAgg<'input> { pub fn new(compact_state_agg: CompactStateAgg) -> Self { unsafe { flatten!(StateAgg { compact_state_agg: compact_state_agg.0, }) } } pub fn empty(integer_states: bool) -> Self { Self::new(CompactStateAgg::empty(false, integer_states)) } pub fn as_compact_state_agg(self) -> toolkit_experimental::CompactStateAgg<'static> { unsafe { self.0.compact_state_agg.flatten() } } pub fn assert_int<'a>(&self) { assert!( self.0.compact_state_agg.integer_states, "State must have integer values for this function" ); } pub fn assert_str<'a>(&self) { assert!( !self.0.compact_state_agg.integer_states, "State must have string values for this function" ); } } ron_inout_funcs!(StateAgg<'input>); fn state_trans_inner( state: Option, ts: TimestampTz, value: Option, integer_states: bool, ) -> Option { let value = match value { None => return state, Some(value) => value, }; let mut state = state.unwrap_or_else(|| CompactStateAggTransState::new(integer_states)); state.record(value, ts.into()); Some(state) } #[aggregate] impl toolkit_experimental::compact_state_agg { type State = CompactStateAggTransState; const PARALLEL_SAFE: bool = true; fn transition( state: Option, #[sql_type("timestamptz")] ts: TimestampTz, #[sql_type("text")] value: Option, ) -> Option { state_trans_inner(state, ts, value.map(MaterializedState::String), false) } fn combine(a: Option<&State>, b: Option<&State>) -> Option { match (a, b) { (None, None) => None, (None, Some(only)) | (Some(only), None) => Some(only.clone()), (Some(a), Some(b)) => { let (mut a, mut b) = (a.clone(), b.clone()); a.append(&mut b); Some(a) } } } fn serialize(state: &mut State) -> bytea { crate::do_serialize!(state) } fn deserialize(bytes: bytea) -> State { crate::do_deserialize!(bytes, CompactStateAggTransState) } fn finally(state: Option<&mut State>) -> Option> { state.map(|s| { let mut states = String::new(); let mut durations: Vec = vec![]; let (map, first, last) = s.make_duration_map_and_bounds(); for (state, duration) in map { durations.push(DurationInState { duration, state: state.entry(&mut states), }); } CompactStateAgg::new(states, durations, first, last, None, s.integer_states) }) } } extension_sql!( "CREATE AGGREGATE toolkit_experimental.compact_state_agg( ts timestamptz, value bigint ) ( stype = internal, sfunc = toolkit_experimental.compact_state_agg_int_trans, finalfunc = toolkit_experimental.compact_state_agg_finally_fn_outer, parallel = safe, serialfunc = toolkit_experimental.compact_state_agg_serialize_fn_outer, deserialfunc = toolkit_experimental.compact_state_agg_deserialize_fn_outer, combinefunc = toolkit_experimental.compact_state_agg_combine_fn_outer );", name = "compact_state_agg_bigint", requires = [ compact_state_agg_int_trans, compact_state_agg_finally_fn_outer, compact_state_agg_serialize_fn_outer, compact_state_agg_deserialize_fn_outer, compact_state_agg_combine_fn_outer ], ); #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] fn compact_state_agg_int_trans( __inner: pgrx::Internal, ts: TimestampTz, value: Option, __fcinfo: pg_sys::FunctionCallInfo, ) -> Option { // expanded from #[aggregate] transition function use crate::palloc::{Inner, InternalAsValue, ToInternal}; type State = CompactStateAggTransState; unsafe { let mut __inner: Option>> = __inner.to_inner(); let inner: Option = match &mut __inner { None => None, Some(inner) => Option::take(&mut **inner), }; let state: Option = inner; crate::aggregate_utils::in_aggregate_context(__fcinfo, || { let result = state_trans_inner(state, ts, value.map(MaterializedState::Integer), true); let state: Option = result; __inner = match (__inner, state) { (None, None) => None, (None, state @ Some(..)) => Some(state.into()), (Some(mut inner), state) => { *inner = state; Some(inner) } }; __inner.internal() }) } } #[aggregate] impl state_agg { type State = CompactStateAggTransState; const PARALLEL_SAFE: bool = true; fn transition( state: Option, #[sql_type("timestamptz")] ts: TimestampTz, #[sql_type("text")] value: Option, ) -> Option { compact_state_agg::transition(state, ts, value) } fn combine(a: Option<&State>, b: Option<&State>) -> Option { compact_state_agg::combine(a, b) } fn serialize(state: &mut State) -> bytea { compact_state_agg::serialize(state) } fn deserialize(bytes: bytea) -> State { compact_state_agg::deserialize(bytes) } fn finally(state: Option<&mut State>) -> Option> { state.map(|s| { let mut states = String::new(); let mut durations: Vec = vec![]; let (map, first, last) = s.make_duration_map_and_bounds(); for (state, duration) in map { let state = state.entry(&mut states); durations.push(DurationInState { duration, state }); } let mut merged_durations: Vec = Vec::new(); let mut last_record_state = None; for record in s.records.drain(..) { if last_record_state .clone() .map(|last| last != record.state) .unwrap_or(true) { if let Some(prev) = merged_durations.last_mut() { prev.end_time = record.time; } merged_durations.push(TimeInState { start_time: record.time, end_time: 0, state: record.state.entry(&mut states), }); last_record_state = Some(record.state); } } if let Some(last_time_in_state) = merged_durations.last_mut() { last_time_in_state.end_time = last.as_ref().unwrap().time; } StateAgg::new(CompactStateAgg::new( states, durations, first, last, Some(merged_durations), s.integer_states, )) }) } } extension_sql!( "CREATE AGGREGATE state_agg( ts timestamptz, value bigint ) ( stype = internal, sfunc = state_agg_int_trans, finalfunc = state_agg_finally_fn_outer, parallel = safe, serialfunc = state_agg_serialize_fn_outer, deserialfunc = state_agg_deserialize_fn_outer, combinefunc = state_agg_combine_fn_outer );", name = "state_agg_bigint", requires = [ state_agg_int_trans, state_agg_finally_fn_outer, state_agg_serialize_fn_outer, state_agg_deserialize_fn_outer, state_agg_combine_fn_outer ], ); #[pg_extern(immutable, parallel_safe)] fn state_agg_int_trans( __inner: pgrx::Internal, ts: TimestampTz, value: Option, __fcinfo: pg_sys::FunctionCallInfo, ) -> Option { // expanded from #[aggregate] transition function use crate::palloc::{Inner, InternalAsValue, ToInternal}; type State = CompactStateAggTransState; unsafe { let mut __inner: Option>> = __inner.to_inner(); let inner: Option = match &mut __inner { None => None, Some(inner) => Option::take(&mut **inner), }; let state: Option = inner; crate::aggregate_utils::in_aggregate_context(__fcinfo, || { let result = state_trans_inner(state, ts, value.map(MaterializedState::Integer), true); let state: Option = result; __inner = match (__inner, state) { (None, None) => None, (None, state @ Some(..)) => Some(state.into()), (Some(mut inner), state) => { *inner = state; Some(inner) } }; __inner.internal() }) } } // Intermediate state kept in postgres. #[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)] pub struct CompactStateAggTransState { records: Vec, integer_states: bool, } impl CompactStateAggTransState { fn new(integer_states: bool) -> Self { Self { records: vec![], integer_states, } } fn record(&mut self, state: MaterializedState, time: i64) { self.records.push(Record { state, time }); } fn append(&mut self, other: &mut Self) { self.records.append(&mut other.records) } fn sort_records(&mut self) { self.records.sort_by(|a, b| { if a.time == b.time { // TODO JOSH do we care about instantaneous state changes? // an alternative is to drop duplicate timestamps if a.state != b.state { // TODO use human-readable timestamp panic!( "state cannot be both {:?} and {:?} at {}", a.state, b.state, a.time ) } std::cmp::Ordering::Equal } else { a.time.cmp(&b.time) } }); } /// Use accumulated state, sort, and return tuple of map of states to durations along with first and last record. fn make_duration_map_and_bounds( &mut self, ) -> ( std::collections::HashMap, Option, Option, ) { self.sort_records(); let (first, last) = (self.records.first(), self.records.last()); let first = first.cloned(); let last = last.cloned(); let mut duration_state = DurationState::new(); for record in &self.records { duration_state.handle_record(record.state.clone(), record.time); } duration_state.finalize(); // TODO BRIAN sort this by decreasing duration will make it easier to implement a TopN states (duration_state.durations, first, last) } } fn duration_in_inner<'a>( aggregate: Option>, state: MaterializedState, range: Option<(i64, Option)>, // start and interval ) -> crate::raw::Interval { let time: i64 = if let Some((start, interval)) = range { let end = if let Some(interval) = interval { assert!(interval >= 0, "Interval must not be negative"); start + interval } else { i64::MAX }; assert!(end >= start, "End time must be after start time"); if let Some(agg) = aggregate { assert!( !agg.0.compact, "unreachable: interval specified for compact aggregate" ); let mut total = 0; for tis in agg.combined_durations.iter() { let tis_start_time = i64::max(tis.start_time, start); let tis_end_time = i64::min(tis.end_time, end); if tis_start_time > end { // combined_durations is sorted, so after this point there can't be any more break; }; if tis_end_time >= start && tis.state.materialize(agg.states_as_str()) == state { let amount = tis_end_time - tis_start_time; assert!(amount >= 0, "incorrectly ordered times"); total += amount; } } total } else { 0 } } else { aggregate .and_then(|aggregate| aggregate.get_materialized(&state)) .unwrap_or(0) }; time.into() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn duration_in<'a>(agg: Option>, state: String) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_str() }; let state = MaterializedState::String(state); duration_in_inner(agg, state, None) } #[pg_extern( immutable, parallel_safe, name = "duration_in", schema = "toolkit_experimental" )] pub fn duration_in_int<'a>(agg: Option>, state: i64) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_int() }; duration_in_inner(agg, MaterializedState::Integer(state), None) } #[pg_extern(immutable, parallel_safe, name = "duration_in")] pub fn duration_in_tl<'a>(agg: Option>, state: String) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_str() }; duration_in(agg.map(StateAgg::as_compact_state_agg), state) } #[pg_extern(immutable, parallel_safe, name = "duration_in")] pub fn duration_in_tl_int<'a>(agg: Option>, state: i64) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_int() }; duration_in_inner( agg.map(StateAgg::as_compact_state_agg), MaterializedState::Integer(state), None, ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_duration_in_string<'a>( agg: StateAgg<'a>, accessor: AccessorDurationIn, ) -> crate::raw::Interval { let state = MaterializedState::String( String::from_utf8_lossy(accessor.state_bytes.as_slice()).to_string(), ); duration_in_inner(Some(agg.as_compact_state_agg()), state, None) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_duration_in_int<'a>( agg: StateAgg<'a>, accessor: AccessorDurationInInt, ) -> crate::raw::Interval { let state = MaterializedState::Integer(accessor.state); duration_in_inner(Some(agg.as_compact_state_agg()), state, None) } #[pg_extern(immutable, parallel_safe, name = "duration_in")] pub fn duration_in_range<'a>( agg: Option>, state: String, start: TimestampTz, interval: default!(Option, "NULL"), ) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_str() }; let agg = agg.map(StateAgg::as_compact_state_agg); let interval = interval.map(|interval| crate::datum_utils::interval_to_ms(&start, &interval)); let start = start.into(); duration_in_inner( agg, MaterializedState::String(state), Some((start, interval)), ) } #[pg_extern(immutable, parallel_safe, name = "duration_in")] pub fn duration_in_range_int<'a>( agg: Option>, state: i64, start: TimestampTz, interval: default!(Option, "NULL"), ) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_int() }; let interval = interval.map(|interval| crate::datum_utils::interval_to_ms(&start, &interval)); let start = start.into(); duration_in_inner( agg.map(StateAgg::as_compact_state_agg), MaterializedState::Integer(state), Some((start, interval)), ) } /// Used to indicate no interval was specified. The interval cannot be negative anyways, so this /// value will never be a valid argument. const NO_INTERVAL_MARKER: i64 = i64::MIN; fn range_tuple(start: i64, interval: i64) -> (i64, Option) { ( start, if interval == NO_INTERVAL_MARKER { None } else { Some(interval) }, ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_duration_in_range_string<'a>( agg: StateAgg<'a>, accessor: AccessorDurationInRange, ) -> crate::raw::Interval { let state = MaterializedState::String( String::from_utf8_lossy(accessor.state_bytes.as_slice()).to_string(), ); duration_in_inner( Some(agg.as_compact_state_agg()), state, Some(range_tuple(accessor.start, accessor.interval)), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_duration_in_range_int<'a>( agg: StateAgg<'a>, accessor: AccessorDurationInRangeInt, ) -> crate::raw::Interval { let state = MaterializedState::Integer(accessor.state); duration_in_inner( Some(agg.as_compact_state_agg()), state, Some(range_tuple(accessor.start, accessor.interval)), ) } fn interpolated_duration_in_inner<'a>( aggregate: Option>, state: MaterializedState, start: i64, interval: i64, prev: Option>, ) -> crate::raw::Interval { match aggregate { None => pgrx::error!( "when interpolating data between grouped data, all groups must contain some data" ), Some(aggregate) => { if let Some(ref prev) = prev { assert!( start >= prev.0.last_time, "Start time cannot be before last state of previous aggregate" ); }; let range = if aggregate.compact { assert!( start <= aggregate.first_time, "For compact state aggregates, the start cannot be after the first state" ); assert!( (start + interval) >= aggregate.last_time, "For compact state aggregates, the time range cannot be after the last state" ); None } else { Some((start, Some(interval))) }; let new_agg = aggregate.interpolate(start, interval, prev); duration_in_inner(Some(new_agg), state, range) } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn interpolated_duration_in<'a>( agg: Option>, state: String, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_str() }; let interval = crate::datum_utils::interval_to_ms(&start, &interval); interpolated_duration_in_inner( agg, MaterializedState::String(state), start.into(), interval, prev, ) } #[pg_extern(immutable, parallel_safe, name = "interpolated_duration_in")] pub fn interpolated_duration_in_tl<'a>( agg: Option>, state: String, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_str() }; interpolated_duration_in( agg.map(StateAgg::as_compact_state_agg), state, start, interval, prev.map(StateAgg::as_compact_state_agg), ) } #[pg_extern( immutable, parallel_safe, schema = "toolkit_experimental", name = "interpolated_duration_in" )] pub fn interpolated_duration_in_int<'a>( agg: Option>, state: i64, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_int() }; let interval = crate::datum_utils::interval_to_ms(&start, &interval); interpolated_duration_in_inner( agg, MaterializedState::Integer(state), start.into(), interval, prev, ) } #[pg_extern(immutable, parallel_safe, name = "interpolated_duration_in")] pub fn interpolated_duration_in_tl_int<'a>( agg: Option>, state: i64, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> crate::raw::Interval { if let Some(ref agg) = agg { agg.assert_int() }; interpolated_duration_in_int( agg.map(StateAgg::as_compact_state_agg), state, start, interval, prev.map(StateAgg::as_compact_state_agg), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_interpolated_duration_in_string<'a>( agg: Option>, accessor: AccessorInterpolatedDurationIn, ) -> crate::raw::Interval { let state = MaterializedState::String( String::from_utf8_lossy(accessor.state_bytes.as_slice()).to_string(), ); interpolated_duration_in_inner( agg.map(StateAgg::as_compact_state_agg), state, accessor.start, accessor.interval, if accessor.prev_present { Some(unsafe { accessor.prev.flatten() }.as_compact_state_agg()) } else { None }, ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_interpolated_duration_in_int<'a>( agg: Option>, accessor: AccessorInterpolatedDurationInInt, ) -> crate::raw::Interval { let state = MaterializedState::Integer(accessor.state); interpolated_duration_in_inner( agg.map(StateAgg::as_compact_state_agg), state, accessor.start, accessor.interval, if accessor.prev_present { Some(unsafe { accessor.prev.flatten() }.as_compact_state_agg()) } else { None }, ) } fn duration_in_bad_args_inner() -> ! { panic!("The start and interval parameters cannot be used for duration_in with a compact state aggregate") } #[allow(unused_variables)] // can't underscore-prefix since argument names are used by pgrx #[pg_extern( immutable, parallel_safe, name = "duration_in", schema = "toolkit_experimental" )] pub fn duration_in_bad_args<'a>( agg: Option>, state: String, start: TimestampTz, interval: crate::raw::Interval, ) -> crate::raw::Interval { duration_in_bad_args_inner() } #[allow(unused_variables)] // can't underscore-prefix since argument names are used by pgrx #[pg_extern( immutable, parallel_safe, name = "duration_in", schema = "toolkit_experimental" )] pub fn duration_in_int_bad_args<'a>( agg: Option>, state: i64, start: TimestampTz, interval: crate::raw::Interval, ) -> crate::raw::Interval { duration_in_bad_args_inner() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn into_values<'a>( agg: CompactStateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(duration, crate::raw::Interval), ), > { agg.assert_str(); let states: String = agg.states_as_str().to_owned(); TableIterator::new(agg.durations.clone().into_iter().map(move |record| { ( record.state.as_str(&states).to_string(), record.duration.into(), ) })) } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn into_int_values<'a>( agg: CompactStateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(duration, crate::raw::Interval), ), > { agg.assert_int(); TableIterator::new( agg.durations .clone() .into_iter() .map(move |record| (record.state.into_integer(), record.duration.into())) .collect::>() .into_iter(), // make map panic now instead of at iteration time ) } #[pg_extern(immutable, parallel_safe, name = "into_values")] pub fn into_values_tl<'a>( agg: StateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(duration, crate::raw::Interval), ), > { agg.assert_str(); into_values(agg.as_compact_state_agg()) } #[pg_extern(immutable, parallel_safe, name = "into_int_values")] pub fn into_values_tl_int<'a>( agg: StateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(duration, crate::raw::Interval), ), > { agg.assert_int(); into_int_values(agg.as_compact_state_agg()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_into_values<'a>( agg: StateAgg<'a>, _accessor: AccessorIntoValues, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(duration, crate::raw::Interval), ), > { into_values_tl(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_into_int_values<'a>( agg: StateAgg<'a>, _accessor: AccessorIntoIntValues, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(duration, crate::raw::Interval), ), > { into_values_tl_int(agg) } fn state_timeline_inner<'a>( agg: CompactStateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { assert!( !agg.compact, "state_timeline can only be called on a compact_state_agg built from state_agg" ); let states: String = agg.states_as_str().to_owned(); TableIterator::new( agg.combined_durations .clone() .into_iter() .map(move |record| { ( record.state.as_str(&states).to_string(), TimestampTz::from(record.start_time), TimestampTz::from(record.end_time), ) }), ) } fn state_int_timeline_inner<'a>( agg: CompactStateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { assert!( !agg.compact, "state_timeline can only be called on a compact_state_agg built from state_agg" ); TableIterator::new( agg.combined_durations .clone() .into_iter() .map(move |record| { ( record.state.into_integer(), TimestampTz::from(record.start_time), TimestampTz::from(record.end_time), ) }) .collect::>() .into_iter(), // make map panic now instead of at iteration time ) } #[pg_extern(immutable, parallel_safe)] pub fn state_timeline<'a>( agg: StateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { agg.assert_str(); state_timeline_inner(agg.as_compact_state_agg()) } #[pg_extern(immutable, parallel_safe)] pub fn state_int_timeline<'a>( agg: StateAgg<'a>, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { agg.assert_int(); state_int_timeline_inner(agg.as_compact_state_agg()) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_state_timeline<'a>( agg: StateAgg<'a>, _accessor: AccessorStateTimeline, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { state_timeline(agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_state_int_timeline<'a>( agg: StateAgg<'a>, _accessor: AccessorStateIntTimeline, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { state_int_timeline(agg) } fn interpolated_state_timeline_inner<'a>( agg: Option>, start: i64, interval: i64, prev: Option>, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { if let Some(ref agg) = agg { agg.assert_str() }; match agg { None => pgrx::error!( "when interpolating data between grouped data, all groups must contain some data" ), Some(agg) => TableIterator::new( state_timeline_inner(agg.as_compact_state_agg().interpolate( start, interval, prev.map(StateAgg::as_compact_state_agg), )) .collect::>() .into_iter(), ), } } fn interpolated_state_int_timeline_inner<'a>( agg: Option>, start: i64, interval: i64, prev: Option>, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { if let Some(ref agg) = agg { agg.assert_int() }; match agg { None => pgrx::error!( "when interpolating data between grouped data, all groups must contain some data" ), Some(agg) => TableIterator::new( state_int_timeline_inner(agg.as_compact_state_agg().interpolate( start, interval, prev.map(StateAgg::as_compact_state_agg), )) .collect::>() .into_iter(), ), } } #[pg_extern(immutable, parallel_safe)] pub fn interpolated_state_timeline<'a>( agg: Option>, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { let interval = crate::datum_utils::interval_to_ms(&start, &interval); interpolated_state_timeline_inner(agg, start.into(), interval, prev) } #[pg_extern(immutable, parallel_safe)] pub fn interpolated_state_int_timeline<'a>( agg: Option>, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { let interval = crate::datum_utils::interval_to_ms(&start, &interval); interpolated_state_int_timeline_inner(agg, start.into(), interval, prev) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_interpolated_state_timeline<'a>( agg: Option>, accessor: AccessorInterpolatedStateTimeline, ) -> TableIterator< 'a, ( pgrx::name!(state, String), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { interpolated_state_timeline_inner( agg, accessor.start, accessor.interval, if accessor.prev_present { Some(unsafe { accessor.prev.flatten() }) } else { None }, ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_interpolated_state_int_timeline<'a>( agg: Option>, accessor: AccessorInterpolatedStateIntTimeline, ) -> TableIterator< 'a, ( pgrx::name!(state, i64), pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { interpolated_state_int_timeline_inner( agg, accessor.start, accessor.interval, if accessor.prev_present { Some(unsafe { accessor.prev.flatten() }) } else { None }, ) } fn state_periods_inner<'a>( agg: CompactStateAgg<'a>, state: MaterializedState, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { assert!( !agg.compact, "state_periods can only be called on a compact_state_agg built from state_agg" ); let states: String = agg.states_as_str().to_owned(); TableIterator::new( agg.combined_durations .clone() .into_iter() .filter_map(move |record| { if record.state.materialize(&states) == state { Some(( TimestampTz::from(record.start_time), TimestampTz::from(record.end_time), )) } else { None } }), ) } #[pg_extern(immutable, parallel_safe)] pub fn state_periods<'a>( agg: StateAgg<'a>, state: String, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { agg.assert_str(); let agg = agg.as_compact_state_agg(); state_periods_inner(agg, MaterializedState::String(state)) } #[pg_extern(immutable, parallel_safe, name = "state_periods")] pub fn state_int_periods<'a>( agg: StateAgg<'a>, state: i64, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { agg.assert_int(); state_periods_inner( agg.as_compact_state_agg(), MaterializedState::Integer(state), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_state_periods_string<'a>( agg: StateAgg<'a>, accessor: AccessorStatePeriods, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { let state = MaterializedState::String( String::from_utf8_lossy(accessor.state_bytes.as_slice()).to_string(), ); state_periods_inner(agg.as_compact_state_agg(), state) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_state_periods_int<'a>( agg: StateAgg<'a>, accessor: AccessorStatePeriodsInt, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { let state = MaterializedState::Integer(accessor.state); state_periods_inner(agg.as_compact_state_agg(), state) } fn interpolated_state_periods_inner<'a>( aggregate: Option>, state: MaterializedState, start: i64, interval: i64, prev: Option>, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { match aggregate { None => pgrx::error!( "when interpolating data between grouped data, all groups must contain some data" ), Some(aggregate) => TableIterator::new( state_periods_inner(aggregate.interpolate(start, interval, prev), state) .collect::>() .into_iter(), ), } } #[pg_extern(immutable, parallel_safe)] pub fn interpolated_state_periods<'a>( agg: Option>, state: String, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { if let Some(ref agg) = agg { agg.assert_str() }; let interval = crate::datum_utils::interval_to_ms(&start, &interval); interpolated_state_periods_inner( agg.map(StateAgg::as_compact_state_agg), MaterializedState::String(state), start.into(), interval, prev.map(StateAgg::as_compact_state_agg), ) } #[pg_extern(immutable, parallel_safe, name = "interpolated_state_periods")] pub fn interpolated_state_periods_int<'a>( agg: Option>, state: i64, start: TimestampTz, interval: crate::raw::Interval, prev: Option>, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { if let Some(ref agg) = agg { agg.assert_int() }; let interval = crate::datum_utils::interval_to_ms(&start, &interval); interpolated_state_periods_inner( agg.map(StateAgg::as_compact_state_agg), MaterializedState::Integer(state), start.into(), interval, prev.map(StateAgg::as_compact_state_agg), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_interpolated_state_periods_string<'a>( agg: Option>, accessor: AccessorInterpolatedStatePeriods, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { let state = MaterializedState::String( String::from_utf8_lossy(accessor.state_bytes.as_slice()).to_string(), ); interpolated_state_periods_inner( agg.map(StateAgg::as_compact_state_agg), state, accessor.start, accessor.interval, if accessor.prev_present { Some(unsafe { accessor.prev.flatten() }.as_compact_state_agg()) } else { None }, ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_interpolated_state_periods_int<'a>( agg: Option>, accessor: AccessorInterpolatedStatePeriodsInt, ) -> TableIterator< 'a, ( pgrx::name!(start_time, TimestampTz), pgrx::name!(end_time, TimestampTz), ), > { let state = MaterializedState::Integer(accessor.state); interpolated_state_periods_inner( agg.map(StateAgg::as_compact_state_agg), state, accessor.start, accessor.interval, if accessor.prev_present { Some(unsafe { accessor.prev.flatten() }.as_compact_state_agg()) } else { None }, ) } fn state_at_inner<'a>(agg: StateAgg<'a>, point: i64) -> Option { let agg = agg.as_compact_state_agg(); let point: i64 = point.into(); if agg.combined_durations.is_empty() { return None; } // binary search to find the first time at or after the start time let slice = agg.combined_durations.as_slice(); let idx = match slice.binary_search_by(|tis| tis.start_time.cmp(&point)) { Ok(idx) => idx, Err(idx) => idx.checked_sub(1)?, // return NULL if before first item }; let tis = slice.get(idx).expect("binary search index out-of-bounds"); Some(tis.state.materialize(agg.states_as_str())) } #[pg_extern(immutable, parallel_safe, name = "state_at")] fn state_at<'a>(agg: StateAgg<'a>, point: TimestampTz) -> Option { agg.assert_str(); state_at_inner(agg, point.into()).map(MaterializedState::into_string) } #[pg_extern(immutable, parallel_safe, name = "state_at_int")] fn state_at_int<'a>(agg: StateAgg<'a>, point: TimestampTz) -> Option { agg.assert_int(); state_at_inner(agg, point.into()).map(MaterializedState::into_integer) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_state_at_string<'a>( agg: StateAgg<'a>, accessor: AccessorStateAt, ) -> Option { state_at_inner(agg, accessor.time).map(MaterializedState::into_string) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_state_agg_state_at_int<'a>( agg: StateAgg<'a>, accessor: AccessorStateAtInt, ) -> Option { state_at_inner(agg, accessor.time).map(MaterializedState::into_integer) } #[derive(Clone, Debug, Deserialize, Eq, FlatSerializable, PartialEq, Serialize)] #[repr(C)] pub struct DurationInState { duration: i64, state: StateEntry, } #[derive(Clone, Debug, Deserialize, Eq, FlatSerializable, PartialEq, Serialize)] #[repr(C)] pub struct TimeInState { start_time: i64, end_time: i64, state: StateEntry, } struct DurationState { last_state: Option<(MaterializedState, i64)>, durations: std::collections::HashMap, } impl DurationState { fn new() -> Self { Self { last_state: None, durations: std::collections::HashMap::new(), } } fn handle_record(&mut self, state: MaterializedState, time: i64) { match self.last_state.take() { None => self.last_state = Some((state, time)), Some((last_state, last_time)) => { debug_assert!(time >= last_time); self.last_state = Some((state, time)); match self.durations.get_mut(&last_state) { None => { self.durations.insert(last_state, time - last_time); } Some(duration) => { let this_duration = time - last_time; let new_duration = *duration + this_duration; *duration = new_duration; } } } } } // It's possible that our last seen state was unique, in which case we'll have to // add a 0 duration entry so that we can handle rollup and interpolation calls fn finalize(&mut self) { if let Some((last_state, _)) = self.last_state.take() { self.durations.entry(last_state).or_insert(0); } } } #[derive(Serialize, Deserialize, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] struct Record { state: MaterializedState, time: i64, } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use std::sync::atomic::Ordering::Relaxed; use super::*; use pgrx_macros::pg_test; macro_rules! select_one { ($client:expr, $stmt:expr, $type:ty) => { $client .update($stmt, None, &[]) .unwrap() .first() .get_one::<$type>() .unwrap() .unwrap() }; } #[pg_test] #[should_panic = "The start and interval parameters cannot be used for duration_in with"] fn duration_in_misuse_error() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); assert_eq!( "365 days 00:02:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one', '2020-01-01', '1 day')::TEXT FROM test", &str ) ); }) } #[pg_test] fn one_state_one_change() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-12-31 00:02:00+00', 'end') "#, None, &[], ) .unwrap(); assert_eq!( "365 days 00:02:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); assert_eq!( "365 days 00:02:00", select_one!( client, "SELECT duration_in(state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); }); } #[pg_test] fn two_states_two_changes() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-01-01 00:01:00+00', 'two'), ('2020-12-31 00:02:00+00', 'end') "#, None, &[], ) .unwrap(); assert_eq!( "00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); assert_eq!( "365 days 00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'two')::TEXT FROM test", &str ) ); }); } #[pg_test] fn two_states_three_changes() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-01-01 00:01:00+00', 'two'), ('2020-01-01 00:02:00+00', 'one'), ('2020-12-31 00:02:00+00', 'end') "#, None, &[], ) .unwrap(); assert_eq!( "365 days 00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); assert_eq!( "00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'two')::TEXT FROM test", &str ) ); assert_eq!( "365 days 00:01:00", select_one!( client, "SELECT duration_in(state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); assert_eq!( "00:01:00", select_one!( client, "SELECT duration_in(state_agg(ts, state), 'two')::TEXT FROM test", &str ) ); }); } #[pg_test] fn out_of_order_times() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-01-01 00:02:00+00', 'one'), ('2020-01-01 00:01:00+00', 'two'), ('2020-12-31 00:02:00+00', 'end') "#, None, &[], ) .unwrap(); assert_eq!( "365 days 00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); assert_eq!( "00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'two')::TEXT FROM test", &str ) ); }); } #[pg_test] fn same_state_twice() { // TODO Do we care? Could be that states are recorded not only when they change but // also at regular intervals even when they don't? Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-01-01 00:01:00+00', 'one'), ('2020-01-01 00:02:00+00', 'two'), ('2020-12-31 00:02:00+00', 'end') "#, None, &[], ) .unwrap(); assert_eq!( "00:02:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); assert_eq!( "365 days", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'two')::TEXT FROM test", &str ) ); }); } #[pg_test] fn duration_in_two_states_two_changes() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-01-01 00:01:00+00', 'two'), ('2020-12-31 00:02:00+00', 'end') "#, None, &[], ) .unwrap(); assert_eq!( "00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); assert_eq!( "365 days 00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'two')::TEXT FROM test", &str ) ); }); } #[pg_test] fn same_state_twice_last() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-01-01 00:01:00+00', 'two'), ('2020-01-01 00:02:00+00', 'two') "#, None, &[], ) .unwrap(); assert_eq!( "00:01:00", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'two')::TEXT FROM test", &str ) ); }); } #[pg_test] fn combine_using_muchos_data() { compact_state_agg::counters::reset(); Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client.update( r#" insert into test values ('2020-01-01 00:00:00+00', 'one'); insert into test select '2020-01-02 UTC'::timestamptz + make_interval(days=>v), 'two' from generate_series(1,300000) v; insert into test select '2020-01-02 UTC'::timestamptz + make_interval(days=>v), 'three' from generate_series(300001,600000) v; insert into test select '2020-01-02 UTC'::timestamptz + make_interval(days=>v), 'four' from generate_series(600001,900000) v; "#, None, &[], ).unwrap(); assert_eq!( "2 days", select_one!( client, "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one')::TEXT FROM test", &str ) ); }); assert!(compact_state_agg::counters::COMBINE_NONE.load(Relaxed) == 0); // TODO untested assert!(compact_state_agg::counters::COMBINE_A.load(Relaxed) == 0); // TODO untested assert!(compact_state_agg::counters::COMBINE_B.load(Relaxed) > 0); // tested assert!(compact_state_agg::counters::COMBINE_BOTH.load(Relaxed) > 0); // tested } // TODO This doesn't work under github actions. Do we run with multiple // CPUs there? If not, that would surely make a big difference. // TODO use EXPLAIN to figure out how it differs when run under github actions // #[pg_test] #[allow(dead_code)] fn combine_using_settings() { compact_state_agg::counters::reset(); Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'one'), ('2020-01-03 00:00:00+00', 'two') "#, None, &[], ) .unwrap(); assert_eq!( "2 days", select_one!( client, r#" SET parallel_setup_cost = 0; SET parallel_tuple_cost = 0; SET min_parallel_table_scan_size = 0; SET max_parallel_workers_per_gather = 4; SET parallel_leader_participation = off; SET enable_indexonlyscan = off; SELECT toolkit_experimental.duration_in('one', toolkit_experimental.compact_state_agg(ts, state))::TEXT FROM ( SELECT * FROM test UNION ALL SELECT * FROM test UNION ALL SELECT * FROM test UNION ALL SELECT * FROM test) u "#, &str ) ); }); assert!(compact_state_agg::counters::COMBINE_NONE.load(Relaxed) == 0); // TODO untested assert!(compact_state_agg::counters::COMBINE_A.load(Relaxed) == 0); // TODO untested assert!(compact_state_agg::counters::COMBINE_B.load(Relaxed) > 0); // tested assert!(compact_state_agg::counters::COMBINE_BOTH.load(Relaxed) > 0); // tested } // the sample query from the ticket #[pg_test] fn sample_query() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'START'), ('2020-01-01 00:01:00+00', 'ERROR'), ('2020-01-01 00:02:00+00', 'STOPPED')"#, None, &[], ) .unwrap(); assert_eq!( client .update( r#"SELECT toolkit_experimental.duration_in(states, 'ERROR')::TEXT as error, toolkit_experimental.duration_in(states, 'START')::TEXT as start, toolkit_experimental.duration_in(states, 'STOPPED')::TEXT as stopped FROM (SELECT toolkit_experimental.compact_state_agg(ts, state) as states FROM test) as foo"#, None, &[], ) .unwrap().first() .get_three::<&str, &str, &str>().unwrap(), (Some("00:01:00"), Some("00:01:00"), Some("00:00:00")) ); assert_eq!( client .update( r#"SELECT duration_in(states, 'ERROR')::TEXT as error, duration_in(states, 'START')::TEXT as start, duration_in(states, 'STOPPED')::TEXT as stopped FROM (SELECT state_agg(ts, state) as states FROM test) as foo"#, None, &[], ) .unwrap() .first() .get_three::<&str, &str, &str>() .unwrap(), (Some("00:01:00"), Some("00:01:00"), Some("00:00:00")) ); }) } #[pg_test] fn interpolated_duration() { Spi::connect_mut(|client| { client .update( "SET TIME ZONE 'UTC'; CREATE TABLE inttest(time TIMESTAMPTZ, state TEXT, bucket INT); CREATE TABLE inttest2(time TIMESTAMPTZ, state BIGINT, bucket INT);", None, &[], ) .unwrap(); client .update( r#"INSERT INTO inttest VALUES ('2020-1-1 10:00'::timestamptz, 'one', 1), ('2020-1-1 12:00'::timestamptz, 'two', 1), ('2020-1-1 16:00'::timestamptz, 'three', 1), ('2020-1-2 2:00'::timestamptz, 'one', 2), ('2020-1-2 12:00'::timestamptz, 'two', 2), ('2020-1-2 20:00'::timestamptz, 'three', 2), ('2020-1-3 10:00'::timestamptz, 'one', 3), ('2020-1-3 12:00'::timestamptz, 'two', 3), ('2020-1-3 16:00'::timestamptz, 'three', 3); INSERT INTO inttest2 VALUES ('2020-1-1 10:00'::timestamptz, 10001, 1), ('2020-1-1 12:00'::timestamptz, 10002, 1), ('2020-1-1 16:00'::timestamptz, 10003, 1), ('2020-1-2 2:00'::timestamptz, 10001, 2), ('2020-1-2 12:00'::timestamptz, 10002, 2), ('2020-1-2 20:00'::timestamptz, 10003, 2), ('2020-1-3 10:00'::timestamptz, 10001, 3), ('2020-1-3 12:00'::timestamptz, 10002, 3), ('2020-1-3 16:00'::timestamptz, 10003, 3);"#, None, &[], ) .unwrap(); // Interpolate time spent in state "three" each day let mut durations = client.update( r#"SELECT toolkit_experimental.interpolated_duration_in( agg, 'three', '2019-12-31 0:00'::timestamptz + (bucket * '1 day'::interval), '1 day'::interval, LAG(agg) OVER (ORDER BY bucket) )::TEXT FROM ( SELECT bucket, toolkit_experimental.compact_state_agg(time, state) as agg FROM inttest GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ).unwrap(); // Day 1, in "three" from "16:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("08:00:00") ); // Day 2, in "three" from start of day to "2:00" and "20:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("06:00:00") ); // Day 3, in "three" from start of day to end assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("18:00:00") ); assert!(durations.next().is_none()); let mut durations = client.update( r#"SELECT interpolated_duration_in( agg, 'three', '2019-12-31 0:00'::timestamptz + (bucket * '1 day'::interval), '1 day'::interval, LAG(agg) OVER (ORDER BY bucket) )::TEXT FROM ( SELECT bucket, state_agg(time, state) as agg FROM inttest GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ).unwrap(); // Day 1, in "three" from "16:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("08:00:00") ); // Day 2, in "three" from start of day to "2:00" and "20:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("06:00:00") ); // Day 3, in "three" from start of day to end assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("18:00:00") ); assert!(durations.next().is_none()); let mut durations = client.update( r#"SELECT interpolated_duration_in( agg, 10003, '2019-12-31 0:00'::timestamptz + (bucket * '1 day'::interval), '1 day'::interval, LAG(agg) OVER (ORDER BY bucket) )::TEXT FROM ( SELECT bucket, state_agg(time, state) as agg FROM inttest2 GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ).unwrap(); // Day 1, in "three" from "16:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("08:00:00") ); // Day 2, in "three" from start of day to "2:00" and "20:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("06:00:00") ); // Day 3, in "three" from start of day to end assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("18:00:00") ); assert!(durations.next().is_none()); let mut durations = client.update( r#"SELECT toolkit_experimental.interpolated_duration_in( agg, 10003, '2019-12-31 0:00'::timestamptz + (bucket * '1 day'::interval), '1 day'::interval, LAG(agg) OVER (ORDER BY bucket) )::TEXT FROM ( SELECT bucket, toolkit_experimental.compact_state_agg(time, state) as agg FROM inttest2 GROUP BY bucket ORDER BY bucket ) s ORDER BY bucket"#, None, &[], ).unwrap(); // Day 1, in "three" from "16:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("08:00:00") ); // Day 2, in "three" from start of day to "2:00" and "20:00" to end of day assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("06:00:00") ); // Day 3, in "three" from start of day to end assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("18:00:00") ); assert!(durations.next().is_none()); }); } #[pg_test( error = "state cannot be both String(\"ERROR\") and String(\"START\") at 631152000000000" )] fn two_states_at_one_time() { Spi::connect_mut(|client| { client .update("CREATE TABLE test(ts timestamptz, state TEXT)", None, &[]) .unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-01-01 00:00:00+00', 'START'), ('2020-01-01 00:00:00+00', 'ERROR')"#, None, &[], ) .unwrap(); client .update( "SELECT toolkit_experimental.duration_in(toolkit_experimental.compact_state_agg(ts, state), 'one') FROM test", None, &[] ) .unwrap(); client .update( "SELECT duration_in(state_agg(ts, state), 'one') FROM test", None, &[], ) .unwrap(); }) } #[pg_test] fn interpolate_introduces_state() { Spi::connect_mut(|client| { client .update( "CREATE TABLE states(time TIMESTAMPTZ, state TEXT, bucket INT)", None, &[], ) .unwrap(); client .update( r#"INSERT INTO states VALUES ('2020-1-1 10:00', 'starting', 1), ('2020-1-1 10:30', 'running', 1), ('2020-1-2 16:00', 'error', 2), ('2020-1-3 18:30', 'starting', 3), ('2020-1-3 19:30', 'running', 3), ('2020-1-4 12:00', 'stopping', 4)"#, None, &[], ) .unwrap(); let mut durations = client .update( r#"SELECT toolkit_experimental.interpolated_duration_in( agg, 'running', '2019-12-31 0:00'::timestamptz + (bucket * '1 day'::interval), '1 day'::interval, LAG(agg) OVER (ORDER BY bucket) )::TEXT FROM ( SELECT bucket, toolkit_experimental.compact_state_agg(time, state) as agg FROM states GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("13:30:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("16:00:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("04:30:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("12:00:00") ); let mut durations = client .update( r#"SELECT interpolated_duration_in( agg, 'running', '2019-12-31 0:00'::timestamptz + (bucket * '1 day'::interval), '1 day'::interval, LAG(agg) OVER (ORDER BY bucket) )::TEXT FROM ( SELECT bucket, state_agg(time, state) as agg FROM states GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("13:30:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("16:00:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("04:30:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("12:00:00") ); let mut durations = client .update( r#"SELECT (agg -> interpolated_duration_in( 'running', '2019-12-31 0:00'::timestamptz + (bucket * '1 day'::interval), '1 day'::interval, LAG(agg) OVER (ORDER BY bucket) ))::TEXT FROM ( SELECT bucket, state_agg(time, state) as agg FROM states GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("13:30:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("16:00:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("04:30:00") ); assert_eq!( durations.next().unwrap()[1].value().unwrap(), Some("12:00:00") ); }) } #[pg_test] fn text_serialization() { Spi::connect_mut(|client| { client .update( "SET TIME ZONE 'UTC'; CREATE TABLE states(ts TIMESTAMPTZ, state TEXT); CREATE TABLE states_int(ts TIMESTAMPTZ, state BIGINT);", None, &[], ) .unwrap(); // only a single entry so ordering is consistent between runs client .update( r#"INSERT INTO states VALUES ('2020-1-1 10:00', 'starting'); INSERT INTO states_int VALUES ('2020-1-1 10:00', -67876545);"#, None, &[], ) .unwrap(); let agg_text = select_one!( client, "SELECT state_agg(ts, state)::TEXT FROM states", &str ); let expected = "(version:1,compact_state_agg:(version:1,states_len:8,durations_len:1,durations:[(duration:0,state:(a:0,b:8))],combined_durations_len:1,combined_durations:[(start_time:631188000000000,end_time:631188000000000,state:(a:0,b:8))],first_time:631188000000000,last_time:631188000000000,first_state:0,last_state:0,states:[115,116,97,114,116,105,110,103],compact:false,integer_states:false))"; assert_eq!(agg_text, expected); let agg_text = select_one!( client, "SELECT state_agg(ts, state)::TEXT FROM states_int", &str ); let expected = "(version:1,compact_state_agg:(version:1,states_len:0,durations_len:1,durations:[(duration:0,state:(a:9223372036854775807,b:-67876545))],combined_durations_len:1,combined_durations:[(start_time:631188000000000,end_time:631188000000000,state:(a:9223372036854775807,b:-67876545))],first_time:631188000000000,last_time:631188000000000,first_state:0,last_state:0,states:[],compact:false,integer_states:true))"; assert_eq!(agg_text, expected); }); } #[pg_test] fn combine() { assert_eq!(state_agg::combine(None, None), None); let mut trans_state_2 = CompactStateAggTransState::new(true); trans_state_2.record(MaterializedState::Integer(444), 10005000); let mut trans_state_1 = CompactStateAggTransState::new(true); trans_state_1.record(MaterializedState::Integer(333), 10000000); let trans_state = state_agg::combine(Some(&trans_state_1), Some(&trans_state_2)).unwrap(); let trans_state = state_agg::combine(Some(&trans_state), None).unwrap(); let trans_state = state_agg::combine(None, Some(&trans_state)).unwrap(); assert_eq!( trans_state, CompactStateAggTransState { records: vec![ Record { state: MaterializedState::Integer(333), time: 10000000 }, Record { state: MaterializedState::Integer(444), time: 10005000 } ], integer_states: true, } ); } #[pg_test] fn binary_serialization_integer() { let mut trans_state = CompactStateAggTransState::new(true); // only inserting one state since to avoid random ordering trans_state.record(MaterializedState::Integer(22), 99); let agg = state_agg::finally(Some(&mut trans_state)).unwrap(); // dis: duration i64, state entry (i64, i64) let expected = [ 232, 1, 0, 0, // header 1, // version 0, 0, 0, // padding // inner compact_state_agg: 200, 1, 0, 0, // header 1, // version 0, 0, 0, // padding 0, 0, 0, 0, 0, 0, 0, 0, // states_len (empty since integer states) 1, 0, 0, 0, 0, 0, 0, 0, // durations_len 0, 0, 0, 0, 0, 0, 0, 0, // state 1: duration 255, 255, 255, 255, 255, 255, 255, 127, // state 1: a 22, 0, 0, 0, 0, 0, 0, 0, // state 1: b 1, 0, 0, 0, 0, 0, 0, 0, // combined_durations_len 99, 0, 0, 0, 0, 0, 0, 0, // state 1: start time 99, 0, 0, 0, 0, 0, 0, 0, // state 1: end time 255, 255, 255, 255, 255, 255, 255, 127, // state 1: a 22, 0, 0, 0, 0, 0, 0, 0, // state 1: b 99, 0, 0, 0, 0, 0, 0, 0, // first_time 99, 0, 0, 0, 0, 0, 0, 0, // last_time 0, 0, 0, 0, // first_state (index) 0, 0, 0, 0, // last_state (index) // states array is empty 0, // compact (false) 1, // integer states (true) ]; assert_eq!(agg.to_pg_bytes(), expected); } #[pg_test] fn binary_serialization_string() { let mut trans_state = CompactStateAggTransState::new(false); // only inserting one state since to avoid random ordering trans_state.record(MaterializedState::String("ABC".to_string()), 99); let agg = state_agg::finally(Some(&mut trans_state)).unwrap(); // dis: duration i64, state entry (i64, i64) let expected = [ 244, 1, 0, 0, // header 1, // version 0, 0, 0, // padding // inner compact_state_agg: 212, 1, 0, 0, // header 1, // version 0, 0, 0, // padding 3, 0, 0, 0, 0, 0, 0, 0, // states_len 1, 0, 0, 0, 0, 0, 0, 0, // durations_len 0, 0, 0, 0, 0, 0, 0, 0, // state 1: duration 0, 0, 0, 0, 0, 0, 0, 0, // state 1: a 3, 0, 0, 0, 0, 0, 0, 0, // state 1: b 1, 0, 0, 0, 0, 0, 0, 0, // combined_durations_len 99, 0, 0, 0, 0, 0, 0, 0, // state 1: start time 99, 0, 0, 0, 0, 0, 0, 0, // state 1: end time 0, 0, 0, 0, 0, 0, 0, 0, // state 1: a 3, 0, 0, 0, 0, 0, 0, 0, // state 1: b 99, 0, 0, 0, 0, 0, 0, 0, // first_time 99, 0, 0, 0, 0, 0, 0, 0, // last_time 0, 0, 0, 0, // first_state (index) 0, 0, 0, 0, // last_state (index) 65, 66, 67, // states array 0, // compact (false) 0, // integer states (false) ]; assert_eq!(agg.to_pg_bytes(), expected); } } ================================================ FILE: extension/src/stats_agg.rs ================================================ use pgrx::*; use crate::{ accessors::{ AccessorAverage, AccessorAverageX, AccessorAverageY, AccessorCorr, AccessorCovar, AccessorDeterminationCoeff, AccessorIntercept, AccessorKurtosis, AccessorKurtosisX, AccessorKurtosisY, AccessorNumVals, AccessorSkewness, AccessorSkewnessX, AccessorSkewnessY, AccessorSlope, AccessorStdDev, AccessorStdDevX, AccessorStdDevY, AccessorSum, AccessorSumX, AccessorSumY, AccessorVariance, AccessorVarianceX, AccessorVarianceY, AccessorXIntercept, }, aggregate_utils::in_aggregate_context, build, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; pub use stats_agg::stats1d::StatsSummary1D as InternalStatsSummary1D; pub use stats_agg::stats2d::StatsSummary2D as InternalStatsSummary2D; use stats_agg::XYPair; use crate::stats_agg::Method::*; use stats_agg::TwoFloat; use crate::raw::bytea; type StatsSummary1DTF = InternalStatsSummary1D; type StatsSummary2DTF = InternalStatsSummary2D; pg_type! { #[derive(Debug, PartialEq)] struct StatsSummary1D { n: u64, sx: f64, sx2: f64, sx3: f64, sx4: f64, } } pg_type! { #[derive(Debug, PartialEq)] struct StatsSummary2D { n: u64, sx: f64, sx2: f64, sx3: f64, sx4: f64, sy: f64, sy2: f64, sy3: f64, sy4: f64, sxy: f64, } } ron_inout_funcs!(StatsSummary1D); ron_inout_funcs!(StatsSummary2D); impl StatsSummary1D { fn to_internal(&self) -> InternalStatsSummary1D { InternalStatsSummary1D { n: self.n, sx: self.sx, sx2: self.sx2, sx3: self.sx3, sx4: self.sx4, } } pub fn from_internal(st: InternalStatsSummary1D) -> Self { build!(StatsSummary1D { n: st.n, sx: st.sx, sx2: st.sx2, sx3: st.sx3, sx4: st.sx4, }) } } impl StatsSummary2D { fn to_internal(&self) -> InternalStatsSummary2D { InternalStatsSummary2D { n: self.n, sx: self.sx, sx2: self.sx2, sx3: self.sx3, sx4: self.sx4, sy: self.sy, sy2: self.sy2, sy3: self.sy3, sy4: self.sy4, sxy: self.sxy, } } fn from_internal(st: InternalStatsSummary2D) -> Self { build!(StatsSummary2D { n: st.n, sx: st.sx, sx2: st.sx2, sx3: st.sx3, sx4: st.sx4, sy: st.sy, sy2: st.sy2, sy3: st.sy3, sy4: st.sy4, sxy: st.sxy, }) } } #[pg_extern(immutable, parallel_safe, strict)] pub fn stats1d_trans_serialize(state: Internal) -> bytea { let ser: &StatsSummary1D = unsafe { state.get().unwrap() }; let ser: &StatsSummary1DData = &ser.0; crate::do_serialize!(ser) } #[pg_extern(immutable, parallel_safe, strict)] pub fn stats1d_trans_deserialize(bytes: bytea, _internal: Internal) -> Option { stats1d_trans_deserialize_inner(bytes).internal() } pub fn stats1d_trans_deserialize_inner(bytes: bytea) -> Inner { let de: StatsSummary1D = crate::do_deserialize!(bytes, StatsSummary1DData); de.into() } #[pg_extern(immutable, parallel_safe, strict)] pub fn stats2d_trans_serialize(state: Internal) -> bytea { let ser: &StatsSummary2D = unsafe { state.get().unwrap() }; let ser: &StatsSummary2DData = &ser.0; crate::do_serialize!(ser) } #[pg_extern(immutable, parallel_safe, strict)] pub fn stats2d_trans_deserialize(bytes: bytea, _internal: Internal) -> Option { stats2d_trans_deserialize_inner(bytes).internal() } pub fn stats2d_trans_deserialize_inner(bytes: bytea) -> Inner { let de: StatsSummary2D = crate::do_deserialize!(bytes, StatsSummary2DData); de.into() } #[pg_extern(immutable, parallel_safe)] pub fn stats1d_trans( state: Internal, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats1d_trans_inner(unsafe { state.to_inner() }, val, fcinfo).internal() } #[pg_extern(immutable, parallel_safe)] pub fn stats1d_tf_trans( state: Internal, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats1d_tf_trans_inner(unsafe { state.to_inner() }, val, fcinfo).internal() } pub fn stats1d_trans_inner( state: Option>, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { match (state, val) { (None, None) => { Some(StatsSummary1D::from_internal(InternalStatsSummary1D::new()).into()) } // return an empty one from the trans function because otherwise it breaks in the window context (Some(state), None) => Some(state), (None, Some(val)) => { let mut s = InternalStatsSummary1D::new(); s.accum(val).unwrap(); Some(StatsSummary1D::from_internal(s).into()) } (Some(mut state), Some(val)) => { let mut s: InternalStatsSummary1D = state.to_internal(); s.accum(val).unwrap(); *state = StatsSummary1D::from_internal(s); Some(state) } } }) } } pub fn stats1d_tf_trans_inner( state: Option>, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { match (state, val) { (None, None) => Some(InternalStatsSummary1D::new().into()), // return an empty one from the trans function because otherwise it breaks in the window context (Some(state), None) => Some(state), (None, Some(val)) => { let val = TwoFloat::from(val); let mut s = InternalStatsSummary1D::new(); s.accum(val).unwrap(); Some(s.into()) } (Some(mut state), Some(val)) => { let val = TwoFloat::from(val); state.accum(val).unwrap(); Some(state) } } }) } } // Note that in general, for all stats2d cases, if either the y or x value is missing, we disregard the entire point as the n is shared between them // if the user wants us to treat nulls as a particular value (ie zero), they can use COALESCE to do so #[pg_extern(immutable, parallel_safe)] pub fn stats2d_trans( state: Internal, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats2d_trans_inner(unsafe { state.to_inner() }, y, x, fcinfo).internal() } pub fn stats2d_trans_inner( state: Option>, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let val: Option> = match (y, x) { (None, _) => None, (_, None) => None, (Some(y), Some(x)) => Some(XYPair { y, x }), }; match (state, val) { (None, None) => { // return an empty one from the trans function because otherwise it breaks in the window context Some(StatsSummary2D::from_internal(InternalStatsSummary2D::new()).into()) } (Some(state), None) => Some(state), (None, Some(val)) => { let mut s = InternalStatsSummary2D::new(); s.accum(val).unwrap(); Some(StatsSummary2D::from_internal(s).into()) } (Some(mut state), Some(val)) => { let mut s: InternalStatsSummary2D = state.to_internal(); s.accum(val).unwrap(); *state = StatsSummary2D::from_internal(s); Some(state) } } }) } } #[pg_extern(immutable, parallel_safe)] pub fn stats2d_tf_trans( state: Internal, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats2d_tf_trans_inner(unsafe { state.to_inner() }, y, x, fcinfo).internal() } pub fn stats2d_tf_trans_inner( state: Option>, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let val: Option> = match (y, x) { (None, _) => None, (_, None) => None, (Some(y), Some(x)) => Some(XYPair { y: y.into(), x: x.into(), }), }; match (state, val) { (None, None) => { // return an empty one from the trans function because otherwise it breaks in the window context Some(StatsSummary2DTF::new().into()) } (Some(state), None) => Some(state), (None, Some(val)) => { let mut s = InternalStatsSummary2D::new(); s.accum(val).unwrap(); Some(s.into()) } (Some(mut state), Some(val)) => { let mut s: StatsSummary2DTF = *state; s.accum(val).unwrap(); *state = s; Some(state) } } }) } } #[pg_extern(immutable)] pub fn stats1d_inv_trans( state: Internal, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats1d_inv_trans_inner(unsafe { state.to_inner() }, val, fcinfo).internal() } pub fn stats1d_inv_trans_inner( state: Option>, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, val) { (None, _) => panic!("Inverse function should never be called with NULL state"), (Some(state), None) => Some(state), (Some(state), Some(val)) => { let s: InternalStatsSummary1D = state.to_internal(); let s = s.remove(val); s.map(|s| StatsSummary1D::from_internal(s).into()) } }) } } #[pg_extern(immutable)] pub fn stats1d_tf_inv_trans( state: Internal, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats1d_tf_inv_trans_inner(unsafe { state.to_inner() }, val, fcinfo).internal() } pub fn stats1d_tf_inv_trans_inner( state: Option>, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, val) { (None, _) => panic!("Inverse function should never be called with NULL state"), (Some(state), None) => Some(state), (Some(state), Some(val)) => { let val = TwoFloat::new_add(val, 0.0); let state = state.remove(val); state.map(|s| s.into()) } }) } } #[pg_extern(immutable)] pub fn stats2d_inv_trans( state: Internal, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats2d_inv_trans_inner(unsafe { state.to_inner() }, y, x, fcinfo).internal() } pub fn stats2d_inv_trans_inner( state: Option>, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let val: Option> = match (y, x) { (None, _) => None, (_, None) => None, (Some(y), Some(x)) => Some(XYPair { y, x }), }; match (state, val) { (None, _) => panic!("Inverse function should never be called with NULL state"), (Some(state), None) => Some(state), (Some(state), Some(val)) => { let s: InternalStatsSummary2D = state.to_internal(); let s = s.remove(val); s.map(|s| StatsSummary2D::from_internal(s).into()) } } }) } } #[pg_extern(immutable)] pub fn stats2d_tf_inv_trans( state: Internal, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats2d_tf_inv_trans_inner(unsafe { state.to_inner() }, y, x, fcinfo).internal() } pub fn stats2d_tf_inv_trans_inner( state: Option>, y: Option, x: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let val: Option> = match (y, x) { (None, _) => None, (_, None) => None, (Some(y), Some(x)) => Some(XYPair { y: y.into(), x: x.into(), }), }; match (state, val) { (None, _) => panic!("Inverse function should never be called with NULL state"), (Some(state), None) => Some(state), (Some(state), Some(val)) => { let s: InternalStatsSummary2D = *state; let s = s.remove(val); s.map(|s| s.into()) } } }) } } #[pg_extern(immutable, parallel_safe)] pub fn stats1d_summary_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats1d_summary_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn stats1d_summary_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, value) { (state, None) => state, (None, Some(value)) => Some(value.in_current_context().into()), (Some(state), Some(value)) => { let s = state.to_internal(); let v = value.to_internal(); let s = s.combine(v).unwrap(); let s = StatsSummary1D::from_internal(s); Some(s.into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn stats2d_summary_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats2d_summary_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn stats2d_summary_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, value) { (state, None) => state, (None, Some(value)) => Some(value.in_current_context().into()), (Some(state), Some(value)) => { let s = state.to_internal(); let v = value.to_internal(); let s = s.combine(v).unwrap(); let s = StatsSummary2D::from_internal(s); Some(s.into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn stats1d_summary_inv_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats1d_summary_inv_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn stats1d_summary_inv_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, &value) { (None, _) => panic!("Inverse function should never be called with NULL state"), (Some(state), None) => Some(state), (Some(state), Some(value)) => { let s = state.to_internal(); let v = value.to_internal(); let s = s.remove_combined(v); s.map(|s| StatsSummary1D::from_internal(s).into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn stats2d_summary_inv_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { stats2d_summary_inv_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn stats2d_summary_inv_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, &value) { (None, _) => panic!("Inverse function should never be called with NULL state"), (Some(state), None) => Some(state), (Some(state), Some(value)) => { let s = state.to_internal(); let v = value.to_internal(); let s = s.remove_combined(v); s.map(|s| StatsSummary2D::from_internal(s).into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn stats1d_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { stats1d_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn stats1d_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (None, Some(state2)) => { let s = state2.in_current_context(); Some(s.into()) } (Some(state1), None) => { let s = state1.in_current_context(); Some(s.into()) } (Some(state1), Some(state2)) => { let s1 = state1.to_internal(); let s2 = state2.to_internal(); let s1 = s1.combine(s2).unwrap(); Some(StatsSummary1D::from_internal(s1).into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn stats2d_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { stats2d_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn stats2d_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (None, Some(state2)) => { let s = state2.in_current_context(); Some(s.into()) } (Some(state1), None) => { let s = state1.in_current_context(); Some(s.into()) } (Some(state1), Some(state2)) => { let s1 = state1.to_internal(); let s2 = state2.to_internal(); let s1 = s1.combine(s2).unwrap(); Some(StatsSummary2D::from_internal(s1).into()) } }) } } #[pg_extern(immutable, parallel_safe)] fn stats1d_final(state: Internal, fcinfo: pg_sys::FunctionCallInfo) -> Option { unsafe { in_aggregate_context(fcinfo, || match state.get() { None => None, Some(state) => { let state: &StatsSummary1D = state; Some(state.in_current_context()) } }) } } #[pg_extern(immutable, parallel_safe)] fn stats1d_tf_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, // return a normal stats summary here ) -> Option { unsafe { in_aggregate_context(fcinfo, || match state.get() { None => None, Some(state) => { let state: &StatsSummary1DTF = state; let state: InternalStatsSummary1D = *state; let state: InternalStatsSummary1D = state.into(); let state: StatsSummary1D = StatsSummary1D::from_internal(state); Some(state.in_current_context()) } }) } } #[pg_extern(immutable, parallel_safe)] fn stats2d_final(state: Internal, fcinfo: pg_sys::FunctionCallInfo) -> Option { unsafe { in_aggregate_context(fcinfo, || match state.get() { None => None, Some(state) => { let state: &StatsSummary2D = state; Some(state.in_current_context()) } }) } } #[pg_extern(immutable, parallel_safe)] fn stats2d_tf_final(state: Internal, fcinfo: pg_sys::FunctionCallInfo) -> Option { unsafe { in_aggregate_context(fcinfo, || match state.get() { None => None, Some(state) => { let state: StatsSummary2DTF = *state; let state: InternalStatsSummary2D = state.into(); let state: StatsSummary2D = StatsSummary2D::from_internal(state); Some(state.in_current_context()) } }) } } // no serial/unserial/combine function for TwoFloats since moving aggregate mode and partial aggregate mode are mutually exclusiveca f extension_sql!( "\n\ CREATE AGGREGATE stats_agg( value DOUBLE PRECISION )\n\ (\n\ sfunc = stats1d_trans,\n\ stype = internal,\n\ finalfunc = stats1d_final,\n\ combinefunc = stats1d_combine,\n\ serialfunc = stats1d_trans_serialize,\n\ deserialfunc = stats1d_trans_deserialize,\n\ msfunc = stats1d_tf_trans,\n\ minvfunc = stats1d_tf_inv_trans,\n\ mstype = internal,\n\ mfinalfunc = stats1d_tf_final,\n\ parallel = safe\n\ );\n\ ", name = "stats_agg_1d", requires = [ stats1d_trans, stats1d_final, stats1d_combine, stats1d_trans_serialize, stats1d_trans_deserialize, stats1d_trans, stats1d_inv_trans, stats1d_final ], ); extension_sql!( "CREATE AGGREGATE toolkit_experimental.stats_agg_tf( value DOUBLE PRECISION )\n\ (\n\ sfunc = stats1d_tf_trans,\n\ stype = internal,\n\ finalfunc = stats1d_tf_final,\n\ msfunc = stats1d_tf_trans,\n\ minvfunc = stats1d_tf_inv_trans,\n\ mstype = internal,\n\ mfinalfunc = stats1d_tf_final,\n\ parallel = safe\n\ );", name = "stats_agg_tf_1d", requires = [ stats1d_tf_trans, stats1d_tf_final, stats1d_tf_trans, stats1d_tf_inv_trans, stats1d_tf_final ], ); // mostly for testing/debugging, in case we want one without the inverse functions defined. extension_sql!( "\n\ CREATE AGGREGATE stats_agg_no_inv( value DOUBLE PRECISION )\n\ (\n\ sfunc = stats1d_trans,\n\ stype = internal,\n\ finalfunc = stats1d_final,\n\ combinefunc = stats1d_combine,\n\ serialfunc = stats1d_trans_serialize,\n\ deserialfunc = stats1d_trans_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "stats_agg_no_inv", requires = [ stats1d_trans, stats1d_final, stats1d_combine, stats1d_trans_serialize, stats1d_trans_deserialize ], ); // same things for the 2d case extension_sql!( "\n\ CREATE AGGREGATE stats_agg( y DOUBLE PRECISION, x DOUBLE PRECISION )\n\ (\n\ sfunc = stats2d_trans,\n\ stype = internal,\n\ finalfunc = stats2d_final,\n\ combinefunc = stats2d_combine,\n\ serialfunc = stats2d_trans_serialize,\n\ deserialfunc = stats2d_trans_deserialize,\n\ msfunc = stats2d_tf_trans,\n\ minvfunc = stats2d_tf_inv_trans,\n\ mstype = internal,\n\ mfinalfunc = stats2d_tf_final,\n\ parallel = safe\n\ );\n\ ", name = "stats_agg_2d", requires = [ stats2d_trans, stats2d_final, stats2d_combine, stats2d_trans_serialize, stats2d_trans_deserialize, stats2d_tf_trans, stats2d_tf_inv_trans, stats2d_tf_final, ], ); extension_sql!( "\n\ CREATE AGGREGATE toolkit_experimental.stats_agg_tf( y DOUBLE PRECISION, x DOUBLE PRECISION )\n\ (\n\ sfunc = stats2d_tf_trans,\n\ stype = internal,\n\ finalfunc = stats2d_tf_final,\n\ msfunc = stats2d_tf_trans,\n\ minvfunc = stats2d_tf_inv_trans,\n\ mstype = internal,\n\ mfinalfunc = stats2d_tf_final,\n\ parallel = safe\n\ );\n\ ", name = "stats_agg_2d_tf", requires = [stats2d_tf_trans, stats2d_tf_inv_trans, stats2d_tf_final], ); // mostly for testing/debugging, in case we want one without the inverse functions defined. extension_sql!( "\n\ CREATE AGGREGATE stats_agg_no_inv( y DOUBLE PRECISION, x DOUBLE PRECISION )\n\ (\n\ sfunc = stats2d_trans,\n\ stype = internal,\n\ finalfunc = stats2d_final,\n\ combinefunc = stats2d_combine,\n\ serialfunc = stats2d_trans_serialize,\n\ deserialfunc = stats2d_trans_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "stats_agg2_no_inv", requires = [ stats2d_trans, stats2d_final, stats2d_combine, stats2d_trans_serialize, stats2d_trans_deserialize ], ); // Currently, rollup does not have the inverse function so if you want the behavior where we don't use the inverse, // you can use it in your window functions (useful for our own perf testing as well) extension_sql!( "\n\ CREATE AGGREGATE rollup(ss statssummary1d)\n\ (\n\ sfunc = stats1d_summary_trans,\n\ stype = internal,\n\ finalfunc = stats1d_final,\n\ combinefunc = stats1d_combine,\n\ serialfunc = stats1d_trans_serialize,\n\ deserialfunc = stats1d_trans_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "stats_1d_rollup", requires = [ stats1d_summary_trans, stats1d_final, stats1d_combine, stats1d_trans_serialize, stats1d_trans_deserialize ], ); // For UI, we decided to have slightly differently named functions for the windowed context and not, so that it reads better, as well as using the inverse function only in the window context extension_sql!( "\n\ CREATE AGGREGATE rolling(ss statssummary1d)\n\ (\n\ sfunc = stats1d_summary_trans,\n\ stype = internal,\n\ finalfunc = stats1d_final,\n\ combinefunc = stats1d_combine,\n\ serialfunc = stats1d_trans_serialize,\n\ deserialfunc = stats1d_trans_deserialize,\n\ msfunc = stats1d_summary_trans,\n\ minvfunc = stats1d_summary_inv_trans,\n\ mstype = internal,\n\ mfinalfunc = stats1d_final,\n\ parallel = safe\n\ );\n\ ", name = "stats_1d_rolling", requires = [ stats1d_summary_trans, stats1d_final, stats1d_combine, stats1d_trans_serialize, stats1d_trans_deserialize, stats1d_summary_inv_trans ], ); // Same as for the 1D case, but for the 2D extension_sql!( "\n\ CREATE AGGREGATE rollup(ss statssummary2d)\n\ (\n\ sfunc = stats2d_summary_trans,\n\ stype = internal,\n\ finalfunc = stats2d_final,\n\ combinefunc = stats2d_combine,\n\ serialfunc = stats2d_trans_serialize,\n\ deserialfunc = stats2d_trans_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "stats_2d_rollup", requires = [ stats2d_summary_trans, stats2d_final, stats2d_combine, stats2d_trans_serialize, stats2d_trans_deserialize ], ); // For UI, we decided to have slightly differently named functions for the windowed context and not, so that it reads better, as well as using the inverse function only in the window context extension_sql!( "\n\ CREATE AGGREGATE rolling(ss statssummary2d)\n\ (\n\ sfunc = stats2d_summary_trans,\n\ stype = internal,\n\ finalfunc = stats2d_final,\n\ combinefunc = stats2d_combine,\n\ serialfunc = stats2d_trans_serialize,\n\ deserialfunc = stats2d_trans_deserialize,\n\ msfunc = stats2d_summary_trans,\n\ minvfunc = stats2d_summary_inv_trans,\n\ mstype = internal,\n\ mfinalfunc = stats2d_final,\n\ parallel = safe\n\ );\n\ ", name = "stats_2d_rolling", requires = [ stats2d_summary_trans, stats2d_final, stats2d_combine, stats2d_trans_serialize, stats2d_trans_deserialize, stats2d_summary_inv_trans ], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats1d_average(sketch: StatsSummary1D, _accessor: AccessorAverage) -> Option { stats1d_average(sketch) } #[pg_extern(name = "average", strict, immutable, parallel_safe)] pub(crate) fn stats1d_average(summary: StatsSummary1D) -> Option { summary.to_internal().avg() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats1d_sum(sketch: StatsSummary1D, _accessor: AccessorSum) -> Option { stats1d_sum(sketch) } #[pg_extern(name = "sum", strict, immutable, parallel_safe)] pub(crate) fn stats1d_sum(summary: StatsSummary1D) -> Option { summary.to_internal().sum() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats1d_stddev( sketch: Option, accessor: AccessorStdDev, ) -> Option { stats1d_stddev(sketch, accessor.method.as_str()) } #[pg_extern(name = "stddev", immutable, parallel_safe)] fn stats1d_stddev( summary: Option, method: default!(&str, "'sample'"), ) -> Option { match method_kind(method) { Population => summary?.to_internal().stddev_pop(), Sample => summary?.to_internal().stddev_samp(), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats1d_variance( sketch: Option, accessor: AccessorVariance, ) -> Option { stats1d_variance(sketch, accessor.method.as_str()) } #[pg_extern(name = "variance", immutable, parallel_safe)] fn stats1d_variance( summary: Option, method: default!(&str, "'sample'"), ) -> Option { match method_kind(method) { Population => summary?.to_internal().var_pop(), Sample => summary?.to_internal().var_samp(), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats1d_skewness(sketch: StatsSummary1D, accessor: AccessorSkewness) -> Option { stats1d_skewness(sketch, accessor.method.as_str()) } #[pg_extern(name = "skewness", immutable, parallel_safe)] fn stats1d_skewness(summary: StatsSummary1D, method: default!(&str, "'sample'")) -> Option { match method_kind(method) { Population => summary.to_internal().skewness_pop(), Sample => summary.to_internal().skewness_samp(), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats1d_kurtosis(sketch: StatsSummary1D, accessor: AccessorKurtosis) -> Option { stats1d_kurtosis(sketch, accessor.method.as_str()) } #[pg_extern(name = "kurtosis", immutable, parallel_safe)] fn stats1d_kurtosis(summary: StatsSummary1D, method: default!(&str, "'sample'")) -> Option { match method_kind(method) { Population => summary.to_internal().kurtosis_pop(), Sample => summary.to_internal().kurtosis_samp(), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats1d_num_vals(sketch: StatsSummary1D, _accessor: AccessorNumVals) -> i64 { stats1d_num_vals(sketch) } #[pg_extern(name = "num_vals", strict, immutable, parallel_safe)] fn stats1d_num_vals(summary: StatsSummary1D) -> i64 { summary.to_internal().count() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_average_x(sketch: StatsSummary2D, _accessor: AccessorAverageX) -> Option { stats2d_average_x(sketch) } #[pg_extern(name = "average_x", strict, immutable, parallel_safe)] fn stats2d_average_x(summary: StatsSummary2D) -> Option { Some(summary.to_internal().avg()?.x) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_average_y(sketch: StatsSummary2D, _accessor: AccessorAverageY) -> Option { stats2d_average_y(sketch) } #[pg_extern(name = "average_y", strict, immutable, parallel_safe)] fn stats2d_average_y(summary: StatsSummary2D) -> Option { Some(summary.to_internal().avg()?.y) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_sum_x(sketch: StatsSummary2D, _accessor: AccessorSumX) -> Option { stats2d_sum_x(sketch) } #[pg_extern(name = "sum_x", strict, immutable, parallel_safe)] fn stats2d_sum_x(summary: StatsSummary2D) -> Option { Some(summary.to_internal().sum()?.x) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_sum_y(sketch: StatsSummary2D, _accessor: AccessorSumY) -> Option { stats2d_sum_y(sketch) } #[pg_extern(name = "sum_y", strict, immutable, parallel_safe)] fn stats2d_sum_y(summary: StatsSummary2D) -> Option { Some(summary.to_internal().sum()?.y) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_stdddev_x( sketch: Option, accessor: AccessorStdDevX, ) -> Option { stats2d_stddev_x(sketch, accessor.method.as_str()) } #[pg_extern(name = "stddev_x", immutable, parallel_safe)] fn stats2d_stddev_x( summary: Option, method: default!(&str, "'sample'"), ) -> Option { match method_kind(method) { Population => Some(summary?.to_internal().stddev_pop()?.x), Sample => Some(summary?.to_internal().stddev_samp()?.x), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_stdddev_y( sketch: Option, accessor: AccessorStdDevY, ) -> Option { stats2d_stddev_y(sketch, accessor.method.as_str()) } #[pg_extern(name = "stddev_y", immutable, parallel_safe)] fn stats2d_stddev_y( summary: Option, method: default!(&str, "'sample'"), ) -> Option { match method_kind(method) { Population => Some(summary?.to_internal().stddev_pop()?.y), Sample => Some(summary?.to_internal().stddev_samp()?.y), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_variance_x( sketch: Option, accessor: AccessorVarianceX, ) -> Option { stats2d_variance_x(sketch, accessor.method.as_str()) } #[pg_extern(name = "variance_x", immutable, parallel_safe)] fn stats2d_variance_x( summary: Option, method: default!(&str, "'sample'"), ) -> Option { match method_kind(method) { Population => Some(summary?.to_internal().var_pop()?.x), Sample => Some(summary?.to_internal().var_samp()?.x), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_variance_y( sketch: Option, accessor: AccessorVarianceY, ) -> Option { stats2d_variance_y(sketch, accessor.method.as_str()) } #[pg_extern(name = "variance_y", immutable, parallel_safe)] fn stats2d_variance_y( summary: Option, method: default!(&str, "'sample'"), ) -> Option { match method_kind(method) { Population => Some(summary?.to_internal().var_pop()?.y), Sample => Some(summary?.to_internal().var_samp()?.y), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_skewness_x( sketch: StatsSummary2D, accessor: AccessorSkewnessX, ) -> Option { stats2d_skewness_x(sketch, accessor.method.as_str()) } #[pg_extern(name = "skewness_x", strict, immutable, parallel_safe)] fn stats2d_skewness_x(summary: StatsSummary2D, method: default!(&str, "'sample'")) -> Option { match method_kind(method) { Population => Some(summary.to_internal().skewness_pop()?.x), Sample => Some(summary.to_internal().skewness_samp()?.x), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_skewness_y( sketch: StatsSummary2D, accessor: AccessorSkewnessY, ) -> Option { stats2d_skewness_y(sketch, accessor.method.as_str()) } #[pg_extern(name = "skewness_y", strict, immutable, parallel_safe)] fn stats2d_skewness_y(summary: StatsSummary2D, method: default!(&str, "'sample'")) -> Option { match method_kind(method) { Population => Some(summary.to_internal().skewness_pop()?.y), Sample => Some(summary.to_internal().skewness_samp()?.y), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_kurtosis_x( sketch: StatsSummary2D, accessor: AccessorKurtosisX, ) -> Option { stats2d_kurtosis_x(sketch, accessor.method.as_str()) } #[pg_extern(name = "kurtosis_x", strict, immutable, parallel_safe)] fn stats2d_kurtosis_x(summary: StatsSummary2D, method: default!(&str, "'sample'")) -> Option { match method_kind(method) { Population => Some(summary.to_internal().kurtosis_pop()?.x), Sample => Some(summary.to_internal().kurtosis_samp()?.x), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_kurtosis_y( sketch: StatsSummary2D, accessor: AccessorKurtosisY, ) -> Option { stats2d_kurtosis_y(sketch, accessor.method.as_str()) } #[pg_extern(name = "kurtosis_y", strict, immutable, parallel_safe)] fn stats2d_kurtosis_y(summary: StatsSummary2D, method: default!(&str, "'sample'")) -> Option { match method_kind(method) { Population => Some(summary.to_internal().kurtosis_pop()?.y), Sample => Some(summary.to_internal().kurtosis_samp()?.y), } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_num_vals(sketch: StatsSummary2D, _accessor: AccessorNumVals) -> i64 { stats2d_num_vals(sketch) } #[pg_extern(name = "num_vals", strict, immutable, parallel_safe)] fn stats2d_num_vals(summary: StatsSummary2D) -> i64 { summary.to_internal().count() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_slope(sketch: StatsSummary2D, _accessor: AccessorSlope) -> Option { stats2d_slope(sketch) } #[pg_extern(name = "slope", strict, immutable, parallel_safe)] fn stats2d_slope(summary: StatsSummary2D) -> Option { summary.to_internal().slope() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_corr(sketch: StatsSummary2D, _accessor: AccessorCorr) -> Option { stats2d_corr(sketch) } #[pg_extern(name = "corr", strict, immutable, parallel_safe)] fn stats2d_corr(summary: StatsSummary2D) -> Option { summary.to_internal().corr() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_intercept( sketch: StatsSummary2D, _accessor: AccessorIntercept, ) -> Option { stats2d_intercept(sketch) } #[pg_extern(name = "intercept", strict, immutable, parallel_safe)] fn stats2d_intercept(summary: StatsSummary2D) -> Option { summary.to_internal().intercept() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_x_intercept( sketch: StatsSummary2D, _accessor: AccessorXIntercept, ) -> Option { stats2d_x_intercept(sketch) } #[pg_extern(name = "x_intercept", strict, immutable, parallel_safe)] fn stats2d_x_intercept(summary: StatsSummary2D) -> Option { summary.to_internal().x_intercept() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_determination_coeff( sketch: StatsSummary2D, _accessor: AccessorDeterminationCoeff, ) -> Option { stats2d_determination_coeff(sketch) } #[pg_extern(name = "determination_coeff", strict, immutable, parallel_safe)] fn stats2d_determination_coeff(summary: StatsSummary2D) -> Option { summary.to_internal().determination_coeff() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_stats2d_covar(sketch: Option, accessor: AccessorCovar) -> Option { stats2d_covar(sketch, accessor.method.as_str()) } #[pg_extern(name = "covariance", immutable, parallel_safe)] fn stats2d_covar( summary: Option, method: default!(&str, "'sample'"), ) -> Option { match method_kind(method) { Population => summary?.to_internal().covar_pop(), Sample => summary?.to_internal().covar_samp(), } } #[derive( Clone, Copy, Debug, serde::Serialize, serde::Deserialize, flat_serialize_macro::FlatSerializable, )] #[repr(u8)] pub enum Method { Population = 1, Sample = 2, } impl Method { pub fn as_str(&self) -> &'static str { match self { Population => "population", Sample => "sample", } } } #[track_caller] pub fn method_kind(method: &str) -> Method { match as_method(method) { Some(method) => method, None => { pgrx::error!("unknown analysis method. Valid methods are 'population' and 'sample'") } } } pub fn as_method(method: &str) -> Option { match method.trim().to_lowercase().as_str() { "population" | "pop" => Some(Population), "sample" | "samp" => Some(Sample), _ => None, } } // TODO: Add testing - probably want to do some fuzz testing against the Postgres implementations of the same. Possibly translate the Postgres tests as well? // #[cfg(any(test, feature = "pg_test"))] // mod tests { // use approx::assert_relative_eq; // use pgrx::*; // use super::*; // macro_rules! select_one { // ($client:expr, $stmt:expr, $type:ty) => { // $client // .update($stmt, None, &[]) // .first() // .get_one::<$type>() // .unwrap() // .unwrap() // }; // } // //do proper numerical comparisons on the values where that matters, use exact where it should be exact. // #[track_caller] // fn stats1d_assert_close_enough(p1:&StatsSummary1D, p2:&StatsSummary1D) { // assert_eq!(p1.n, p2.n, "n"); // assert_relative_eq!(p1.sx, p2.sx); // assert_relative_eq!(p1.sxx, p2.sxx); // } // #[track_caller] // fn stats2d_assert_close_enough(p1:&StatsSummary2D, p2:&StatsSummary2D) { // assert_eq!(p1.n, p2.n, "n"); // assert_relative_eq!(p1.sx, p2.sx); // assert_relative_eq!(p1.sxx, p2.sxx); // assert_relative_eq!(p1.sy, p2.sy); // assert_relative_eq!(p1.syy, p2.syy); // assert_relative_eq!(p1.sxy, p2.sxy); // } // // #[pg_test] // // fn test_combine_aggregate(){ // // Spi::connect_mut(|client| { // // }); // // } // } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use approx::relative_eq; use pgrx_macros::pg_test; use rand::rngs::SmallRng; use rand::seq::SliceRandom; use rand::{self, Rng, SeedableRng}; const RUNS: usize = 10; // Number of runs to generate const VALS: usize = 10000; // Number of values to use for each run const SEED: Option = None; // RNG seed, generated from entropy if None const PRINT_VALS: bool = false; // Print out test values on error, this can be spammy if VALS is high #[pg_test] fn test_stats_agg_text_io() { Spi::connect_mut(|client| { client .update( "CREATE TABLE test_table (test_x DOUBLE PRECISION, test_y DOUBLE PRECISION)", None, &[], ) .unwrap(); let test = client .update( "SELECT stats_agg(test_y, test_x)::TEXT FROM test_table", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert!(test.is_none()); client .update("INSERT INTO test_table VALUES (10, 10);", None, &[]) .unwrap(); let test = client .update( "SELECT stats_agg(test_y, test_x)::TEXT FROM test_table", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!( test, "(version:1,n:1,sx:10,sx2:0,sx3:0,sx4:0,sy:10,sy2:0,sy3:0,sy4:0,sxy:0)" ); client .update("INSERT INTO test_table VALUES (20, 20);", None, &[]) .unwrap(); let test = client .update( "SELECT stats_agg(test_y, test_x)::TEXT FROM test_table", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let expected = "(version:1,n:2,sx:30,sx2:50,sx3:0,sx4:1250,sy:30,sy2:50,sy3:0,sy4:1250,sxy:50)"; assert_eq!(test, expected); // Test a few functions to see that the text serialized object behave the same as the constructed one assert_eq!( client .update( "SELECT skewness_x(stats_agg(test_y, test_x)) FROM test_table", None, &[] ) .unwrap() .first() .get_one::(), client .update( &format!("SELECT skewness_x('{expected}'::StatsSummary2D)"), None, &[] ) .unwrap() .first() .get_one::() ); assert_eq!( client .update( "SELECT kurtosis_y(stats_agg(test_y, test_x)) FROM test_table", None, &[] ) .unwrap() .first() .get_one::(), client .update( &format!("SELECT kurtosis_y('{expected}'::StatsSummary2D)"), None, &[] ) .unwrap() .first() .get_one::() ); assert_eq!( client .update( "SELECT covariance(stats_agg(test_y, test_x)) FROM test_table", None, &[] ) .unwrap() .first() .get_one::(), client .update( &format!("SELECT covariance('{expected}'::StatsSummary2D)"), None, &[] ) .unwrap() .first() .get_one::() ); // Test text round trip assert_eq!( client .update( &format!("SELECT '{expected}'::StatsSummary2D::TEXT"), None, &[] ) .unwrap() .first() .get_one::() .unwrap() .unwrap(), expected ); client .update("INSERT INTO test_table VALUES ('NaN', 30);", None, &[]) .unwrap(); let test = client .update( "SELECT stats_agg(test_y, test_x)::TEXT FROM test_table", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test, "(version:1,n:3,sx:NaN,sx2:NaN,sx3:NaN,sx4:NaN,sy:60,sy2:200,sy3:0,sy4:20000,sxy:NaN)"); client .update("INSERT INTO test_table VALUES (40, 'Inf');", None, &[]) .unwrap(); let test = client .update( "SELECT stats_agg(test_y, test_x)::TEXT FROM test_table", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test, "(version:1,n:4,sx:NaN,sx2:NaN,sx3:NaN,sx4:NaN,sy:inf,sy2:NaN,sy3:NaN,sy4:NaN,sxy:NaN)"); }); } #[pg_test] fn test_stats_agg_byte_io() { unsafe { use std::ptr; let state = stats1d_trans_inner(None, Some(14.0), ptr::null_mut()); let state = stats1d_trans_inner(state, Some(18.0), ptr::null_mut()); let state = stats1d_trans_inner(state, Some(22.7), ptr::null_mut()); let state = stats1d_trans_inner(state, Some(39.42), ptr::null_mut()); let state = stats1d_trans_inner(state, Some(-43.0), ptr::null_mut()); let control = (*state.unwrap()).clone(); let buffer = stats1d_trans_serialize(Inner::from(control.clone()).internal().unwrap()); let buffer = varlena_to_byte_slice(buffer.0.cast_mut_ptr()); let expected = [ 1, 1, 1, 5, 0, 0, 0, 0, 0, 0, 0, 144, 194, 245, 40, 92, 143, 73, 64, 100, 180, 142, 170, 38, 151, 174, 64, 72, 48, 180, 190, 189, 33, 254, 192, 119, 78, 30, 195, 209, 190, 96, 65, ]; assert_eq!(buffer, expected); let expected = pgrx::varlena::rust_byte_slice_to_bytea(&expected); let new_state = stats1d_trans_deserialize_inner(bytea(pg_sys::Datum::from(expected.as_ptr()))); assert_eq!(*new_state, control); } } #[pg_test] fn stats_agg_fuzz() { let mut state = TestState::new(RUNS, VALS, SEED); for _ in 0..state.runs { state.populate_values(); test_aggs(&mut state); state.passed += 1; } } struct TestState { runs: usize, values: usize, passed: usize, x_values: Vec, y_values: Vec, seed: u64, r#gen: SmallRng, } impl TestState { pub fn new(runs: usize, values: usize, seed: Option) -> TestState { let seed = match seed { Some(s) => s, None => SmallRng::from_entropy().gen_range(0..u64::MAX), }; TestState { runs, values, passed: 0, x_values: Vec::new(), y_values: Vec::new(), seed, r#gen: SmallRng::seed_from_u64(seed), } } pub fn populate_values(&mut self) { // Discard old values self.x_values = Vec::with_capacity(self.values); self.y_values = Vec::with_capacity(self.values); // We'll cluster the exponential components of the random values around a particular value let exp_base = self .r#gen .gen_range((f64::MIN_EXP / 10) as f64..(f64::MAX_EXP / 10) as f64); for _ in 0..self.values { let exp = self.r#gen.gen_range((exp_base - 2.)..=(exp_base + 2.)); let mantissa = self.r#gen.gen_range((1.)..2.); let sign = [-1., 1.].choose(&mut self.r#gen).unwrap(); self.x_values.push(sign * mantissa * exp.exp2()); let exp = self.r#gen.gen_range((exp_base - 2.)..=(exp_base + 2.)); let mantissa = self.r#gen.gen_range((1.)..2.); let sign = [-1., 1.].choose(&mut self.r#gen).unwrap(); self.y_values.push(sign * mantissa * exp.exp2()); } } pub fn failed_msg(&self, dump_vals: bool) -> String { format!("Failed after {} successful iterations, run using {} values generated from seed {}{}", self.passed, self.x_values.len(), self.seed, if dump_vals { format!("\nX-values:\n{:?}\n\nY-values:\n{:?}", self.x_values, self.y_values) } else { "".to_string() } ) } } #[allow(clippy::float_cmp)] fn check_agg_equivalence( state: &TestState, client: &mut pgrx::spi::SpiClient, pg_cmd: &str, tk_cmd: &str, allowed_diff: f64, do_moving_agg: bool, ) { warning!("pg_cmd={} ; tk_cmd={}", pg_cmd, tk_cmd); let pg_row = client.update(pg_cmd, None, &[]).unwrap().first(); let (pg_result, pg_moving_agg_result) = if do_moving_agg { pg_row.get_two::().unwrap() } else { (pg_row.get_one::().unwrap(), None) }; let pg_result = pg_result.unwrap(); let (tk_result, arrow_result, tk_moving_agg_result) = client .update(tk_cmd, None, &[]) .unwrap() .first() .get_three::() .unwrap(); let (tk_result, arrow_result) = (tk_result.unwrap(), arrow_result.unwrap()); assert_eq!(tk_result, arrow_result, "Arrow didn't match in {tk_cmd}"); let result = if allowed_diff == 0.0 { pg_result == tk_result } else { relative_eq!(pg_result, tk_result, max_relative = allowed_diff) }; if !result { let abs_diff = f64::abs(pg_result - tk_result); let abs_max = f64::abs(pg_result).max(f64::abs(tk_result)); panic!( "Output didn't match between postgres command: {}\n\ and stats_agg command: {} \n\ \tpostgres result: {}\n\ \tstatsagg result: {}\n\ \trelative difference: {}\n\ \tallowed relative difference: {}\n\ {}", pg_cmd, tk_cmd, pg_result, tk_result, abs_diff / abs_max, allowed_diff, state.failed_msg(PRINT_VALS) ); } if do_moving_agg { approx::assert_relative_eq!( pg_moving_agg_result.unwrap(), tk_moving_agg_result.unwrap(), max_relative = 1e-9, ) } } fn pg1d_aggx(agg: &str) -> String { format!("SELECT {agg}(test_x)::float, (SELECT {agg}(test_x) OVER (ORDER BY test_x ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM test_table LIMIT 1 OFFSET 3)::float FROM test_table") } fn pg1d_aggy(agg: &str) -> String { format!("SELECT {agg}(test_y), (SELECT {agg}(test_y) OVER (ORDER BY test_x ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM test_table LIMIT 1 OFFSET 3) FROM test_table") } fn pg2d_agg(agg: &str) -> String { format!("SELECT {agg}(test_y, test_x)::float, (SELECT {agg}(test_y, test_x) OVER (ORDER BY test_x ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM test_table LIMIT 1 OFFSET 3)::float FROM test_table") } fn tk1d_agg(agg: &str) -> String { format!( "SELECT \ {agg}(stats_agg(test_x))::float, \ (stats_agg(test_x)->{agg}())::float, \ {agg}((SELECT stats_agg(test_x) OVER (ORDER BY test_x ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM test_table LIMIT 1 OFFSET 3))::float \ FROM test_table" ) } fn tk1d_agg_arg(agg: &str, arg: &str) -> String { format!( "SELECT \ {agg}(stats_agg(test_x), '{arg}'), \ stats_agg(test_x)->{agg}('{arg}'), \ {agg}((SELECT stats_agg(test_x) OVER (ORDER BY test_x ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM test_table LIMIT 1 OFFSET 3), '{arg}') \ FROM test_table" ) } fn tk2d_agg(agg: &str) -> String { format!( "SELECT \ {agg}(stats_agg(test_y, test_x))::float, \ (stats_agg(test_y, test_x)->{agg}())::float, \ {agg}((SELECT stats_agg(test_y, test_x) OVER (ORDER BY test_x ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM test_table LIMIT 1 OFFSET 3))::float \ FROM test_table" ) } fn tk2d_agg_arg(agg: &str, arg: &str) -> String { format!( "SELECT \ {agg}(stats_agg(test_y, test_x), '{arg}'), \ stats_agg(test_y, test_x)->{agg}('{arg}'), \ {agg}((SELECT stats_agg(test_y, test_x) OVER (ORDER BY test_x ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM test_table LIMIT 1 OFFSET 3), '{arg}') \ FROM test_table" ) } fn pg_moment_pop_query(moment: i32, column: &str) -> String { format!("select sum(({column} - a.avg)^{moment}) / count({column}) / (stddev_pop({column})^{moment}) from test_table, (select avg({column}) from test_table) a") } fn pg_moment_samp_query(moment: i32, column: &str) -> String { format!("select sum(({column} - a.avg)^{moment}) / (count({column}) - 1) / (stddev_samp({column})^{moment}) from test_table, (select avg({column}) from test_table) a") } fn test_aggs(state: &mut TestState) { Spi::connect_mut(|client| { client .update( "CREATE TABLE test_table (test_x DOUBLE PRECISION, test_y DOUBLE PRECISION)", None, &[], ) .unwrap(); client .update( &format!( "INSERT INTO test_table VALUES {}", state .x_values .iter() .zip(state.y_values.iter()) .map(|(x, y)| "(".to_string() + &x.to_string() + "," + &y.to_string() + ")" + ",") .collect::() .trim_end_matches(',') ), None, &[], ) .unwrap(); // Definitions for allowed errors for different aggregates const NONE: f64 = 0.; // Exact match const EPS1: f64 = f64::EPSILON; // Generally enough to handle float rounding const EPS2: f64 = 2. * f64::EPSILON; // stddev is sqrt(variance), so a bit looser bound const EPS3: f64 = 3. * f64::EPSILON; // Sum of squares in variance agg accumulates a bit more error const BILLIONTH: f64 = 1e-9; // Higher order moments exponentially compound the error check_agg_equivalence( state, client, &pg1d_aggx("avg"), &tk1d_agg("average"), NONE, true, ); check_agg_equivalence( state, client, &pg1d_aggx("sum"), &tk1d_agg("sum"), NONE, true, ); check_agg_equivalence( state, client, &pg1d_aggx("count"), &tk1d_agg("num_vals"), NONE, true, ); check_agg_equivalence( state, client, &pg1d_aggx("stddev"), &tk1d_agg("stddev"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggx("stddev_pop"), &tk1d_agg_arg("stddev", "population"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggx("stddev_samp"), &tk1d_agg_arg("stddev", "sample"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggx("variance"), &tk1d_agg("variance"), EPS3, true, ); check_agg_equivalence( state, client, &pg1d_aggx("var_pop"), &tk1d_agg_arg("variance", "population"), EPS3, true, ); check_agg_equivalence( state, client, &pg1d_aggx("var_samp"), &tk1d_agg_arg("variance", "sample"), EPS3, true, ); check_agg_equivalence( state, client, &pg2d_agg("regr_avgx"), &tk2d_agg("average_x"), NONE, true, ); check_agg_equivalence( state, client, &pg2d_agg("regr_avgy"), &tk2d_agg("average_y"), NONE, true, ); check_agg_equivalence( state, client, &pg1d_aggx("sum"), &tk2d_agg("sum_x"), NONE, true, ); check_agg_equivalence( state, client, &pg1d_aggy("sum"), &tk2d_agg("sum_y"), NONE, true, ); check_agg_equivalence( state, client, &pg1d_aggx("stddev"), &tk2d_agg("stddev_x"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggy("stddev"), &tk2d_agg("stddev_y"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggx("stddev_pop"), &tk2d_agg_arg("stddev_x", "population"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggy("stddev_pop"), &tk2d_agg_arg("stddev_y", "population"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggx("stddev_samp"), &tk2d_agg_arg("stddev_x", "sample"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggy("stddev_samp"), &tk2d_agg_arg("stddev_y", "sample"), EPS2, true, ); check_agg_equivalence( state, client, &pg1d_aggx("variance"), &tk2d_agg("variance_x"), EPS3, true, ); check_agg_equivalence( state, client, &pg1d_aggy("variance"), &tk2d_agg("variance_y"), EPS3, true, ); check_agg_equivalence( state, client, &pg1d_aggx("var_pop"), &tk2d_agg_arg("variance_x", "population"), EPS3, true, ); check_agg_equivalence( state, client, &pg1d_aggy("var_pop"), &tk2d_agg_arg("variance_y", "population"), EPS3, true, ); check_agg_equivalence( state, client, &pg1d_aggx("var_samp"), &tk2d_agg_arg("variance_x", "sample"), EPS3, true, ); check_agg_equivalence( state, client, &pg1d_aggy("var_samp"), &tk2d_agg_arg("variance_y", "sample"), EPS3, true, ); check_agg_equivalence( state, client, &pg2d_agg("regr_count"), &tk2d_agg("num_vals"), NONE, true, ); check_agg_equivalence( state, client, &pg2d_agg("regr_slope"), &tk2d_agg("slope"), EPS1, true, ); check_agg_equivalence( state, client, &pg2d_agg("corr"), &tk2d_agg("corr"), EPS1, true, ); check_agg_equivalence( state, client, &pg2d_agg("regr_intercept"), &tk2d_agg("intercept"), EPS1, true, ); // No postgres equivalent for x_intercept, so we only test function vs. arrow operator. { let query = tk2d_agg("x_intercept"); let (result, arrow_result) = client .update(&query, None, &[]) .unwrap() .first() .get_two::() .unwrap(); assert_eq!(result, arrow_result, "Arrow didn't match in {query}"); } check_agg_equivalence( state, client, &pg2d_agg("regr_r2"), &tk2d_agg("determination_coeff"), EPS1, true, ); check_agg_equivalence( state, client, &pg2d_agg("covar_pop"), &tk2d_agg_arg("covariance", "population"), BILLIONTH, true, ); check_agg_equivalence( state, client, &pg2d_agg("covar_samp"), &tk2d_agg_arg("covariance", "sample"), BILLIONTH, true, ); // Skewness and kurtosis don't have aggregate functions in postgres, but we can compute them check_agg_equivalence( state, client, &pg_moment_pop_query(3, "test_x"), &tk1d_agg_arg("skewness", "population"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_pop_query(3, "test_x"), &tk2d_agg_arg("skewness_x", "population"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_pop_query(3, "test_y"), &tk2d_agg_arg("skewness_y", "population"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_pop_query(4, "test_x"), &tk1d_agg_arg("kurtosis", "population"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_pop_query(4, "test_x"), &tk2d_agg_arg("kurtosis_x", "population"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_pop_query(4, "test_y"), &tk2d_agg_arg("kurtosis_y", "population"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_samp_query(3, "test_x"), &tk1d_agg_arg("skewness", "sample"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_samp_query(3, "test_x"), &tk2d_agg_arg("skewness_x", "sample"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_samp_query(3, "test_y"), &tk2d_agg_arg("skewness_y", "sample"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_samp_query(4, "test_x"), &tk1d_agg_arg("kurtosis", "sample"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_samp_query(4, "test_x"), &tk2d_agg_arg("kurtosis_x", "sample"), BILLIONTH, false, ); check_agg_equivalence( state, client, &pg_moment_samp_query(4, "test_y"), &tk2d_agg_arg("kurtosis_y", "sample"), BILLIONTH, false, ); client.update("DROP TABLE test_table", None, &[]).unwrap(); }); } #[pg_test] fn stats_agg_rolling() { Spi::connect_mut(|client| { client .update( " SET timezone TO 'UTC'; CREATE TABLE prices(ts TIMESTAMPTZ, price FLOAT); INSERT INTO prices ( WITH dates AS (SELECT * FROM generate_series('2020-01-01 00:00'::timestamp, '2020-02-01 12:00', '10 minutes') time) SELECT dates.time, (select (random()+EXTRACT(seconds FROM dates.time))*100 ) price FROM dates ); ", None, &[], ) .unwrap(); let mut vals = client.update( "SELECT stddev(data.stats_agg) FROM (SELECT stats_agg(price) OVER (ORDER BY ts RANGE '50 minutes' PRECEDING) FROM prices) data", None, &[] ).unwrap(); assert!(vals.next().unwrap()[1] .value::() .unwrap() .unwrap() .is_nan()); assert!(vals.next().unwrap()[1].value::().unwrap().is_some()); assert!(vals.next().unwrap()[1].value::().unwrap().is_some()); let mut vals = client.update( "SELECT slope(data.stats_agg) FROM (SELECT stats_agg((EXTRACT(minutes FROM ts)), price) OVER (ORDER BY ts RANGE '50 minutes' PRECEDING) FROM prices) data;", None, &[] ).unwrap(); assert!(vals.next().unwrap()[1].value::().unwrap().is_none()); // trendline is zero initially assert!(vals.next().unwrap()[1].value::().unwrap().is_some()); assert!(vals.next().unwrap()[1].value::().unwrap().is_some()); }); } } ================================================ FILE: extension/src/tdigest.rs ================================================ use std::{convert::TryInto, ops::Deref}; use pgrx::*; use crate::{ accessors::{ AccessorApproxPercentile, AccessorApproxPercentileRank, AccessorMaxVal, AccessorMean, AccessorMinVal, AccessorNumVals, }, aggregate_utils::in_aggregate_context, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, }; use tdigest::{Centroid, TDigest as InternalTDigest}; // PG function for adding values to a digest. // Null values are ignored. #[pg_extern(immutable, parallel_safe)] pub fn tdigest_trans( state: Internal, size: i32, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { tdigest_trans_inner(unsafe { state.to_inner() }, size, value, fcinfo).internal() } pub fn tdigest_trans_inner( state: Option>, size: i32, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let value = match value { None => return state, // NaNs are nonsensical in the context of a percentile, so exclude them Some(value) => { if value.is_nan() { return state; } else { value } } }; let mut state = match state { None => tdigest::Builder::with_size(size.try_into().unwrap()).into(), Some(state) => state, }; state.push(value); Some(state) }) } } // PG function for merging digests. #[pg_extern(immutable, parallel_safe)] pub fn tdigest_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { tdigest_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn tdigest_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (None, Some(state2)) => Some(state2.clone().into()), (Some(state1), None) => Some(state1.clone().into()), (Some(state1), Some(state2)) => { let mut merged = state1.clone(); merged.merge(state2.clone()); Some(merged.into()) } }) } } use crate::raw::bytea; #[pg_extern(immutable, parallel_safe, strict)] pub fn tdigest_serialize(state: Internal) -> bytea { let mut state = state; let state: &mut tdigest::Builder = unsafe { state.get_mut().unwrap() }; // TODO this macro is really broken let hack = state.build(); let hackref = &hack; crate::do_serialize!(hackref) } #[pg_extern(strict, immutable, parallel_safe)] pub fn tdigest_deserialize(bytes: bytea, _internal: Internal) -> Option { tdigest_deserialize_inner(bytes).internal() } pub fn tdigest_deserialize_inner(bytes: bytea) -> Inner { crate::do_deserialize!(bytes, tdigest::Builder) } // PG object for the digest. pg_type! { #[derive(Debug)] struct TDigest<'input> { // We compute this. It's a (harmless) bug that we serialize it. #[serde(skip_deserializing)] buckets: u32, max_buckets: u32, count: u64, sum: f64, min: f64, max: f64, centroids: [Centroid; self.buckets], } } impl<'input> InOutFuncs for TDigest<'input> { fn output(&self, buffer: &mut StringInfo) { use crate::serialization::{str_to_db_encoding, EncodedStr::*}; let stringified = ron::to_string(&**self).unwrap(); match str_to_db_encoding(&stringified) { Utf8(s) => buffer.push_str(s), Other(s) => buffer.push_bytes(s.to_bytes()), } } fn input(input: &std::ffi::CStr) -> TDigest<'input> where Self: Sized, { use crate::serialization::str_from_db_encoding; let input = str_from_db_encoding(input); let mut val: TDigestData = ron::from_str(input).unwrap(); val.buckets = val .centroids .len() .try_into() .expect("centroids len fits into u32"); unsafe { Self(val, crate::type_builder::CachedDatum::None).flatten() } } } impl<'input> TDigest<'input> { fn to_internal_tdigest(&self) -> InternalTDigest { InternalTDigest::new( self.centroids.iter().collect(), self.sum, self.count, self.max, self.0.min, self.max_buckets as usize, ) } fn from_internal_tdigest(digest: &InternalTDigest) -> TDigest<'static> { let max_buckets: u32 = digest.max_size().try_into().unwrap(); let centroids = digest.raw_centroids(); // we need to flatten the vector to a single buffer that contains // both the size, the data, and the varlen header unsafe { flatten!(TDigest { max_buckets, buckets: centroids.len() as u32, count: digest.count(), sum: digest.sum(), min: digest.min(), max: digest.max(), centroids: centroids.into(), }) } } } // PG function to generate a user-facing TDigest object from an internal tdigest::Builder. #[pg_extern(immutable, parallel_safe)] fn tdigest_final(state: Internal, fcinfo: pg_sys::FunctionCallInfo) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let mut state = state; let state: &mut tdigest::Builder = match state.get_mut() { None => return None, Some(state) => state, }; TDigest::from_internal_tdigest(&state.build()).into() }) } } extension_sql!( "\n\ CREATE AGGREGATE tdigest(size integer, value DOUBLE PRECISION)\n\ (\n\ sfunc = tdigest_trans,\n\ stype = internal,\n\ finalfunc = tdigest_final,\n\ combinefunc = tdigest_combine,\n\ serialfunc = tdigest_serialize,\n\ deserialfunc = tdigest_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "tdigest_agg", requires = [ tdigest_trans, tdigest_final, tdigest_combine, tdigest_serialize, tdigest_deserialize ], ); #[pg_extern(immutable, parallel_safe)] pub fn tdigest_compound_trans( state: Internal, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { tdigest_compound_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn tdigest_compound_trans_inner( state: Option>, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { match (state, value) { (a, None) => a, (None, Some(a)) => Some(a.to_internal_tdigest().into()), (Some(a), Some(b)) => { assert_eq!(a.max_size(), b.max_buckets as usize); Some( InternalTDigest::merge_digests( vec![a.deref().clone(), b.to_internal_tdigest()], // TODO: TDigest merge with self ) .into(), ) } } }) } } #[pg_extern(immutable, parallel_safe)] pub fn tdigest_compound_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { tdigest_compound_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn tdigest_compound_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { match (state1, state2) { (None, None) => None, (None, Some(state2)) => Some(state2.clone().into()), (Some(state1), None) => Some(state1.clone().into()), (Some(state1), Some(state2)) => { assert_eq!(state1.max_size(), state2.max_size()); Some( InternalTDigest::merge_digests( vec![state1.deref().clone(), state2.deref().clone()], // TODO: TDigest merge with self ) .into(), ) } } }) } } #[pg_extern(immutable, parallel_safe)] fn tdigest_compound_final( state: Internal, _fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { let state: Option<&InternalTDigest> = unsafe { state.get() }; state.map(TDigest::from_internal_tdigest) } #[pg_extern(immutable, parallel_safe)] fn tdigest_compound_serialize(state: Internal, _fcinfo: pg_sys::FunctionCallInfo) -> bytea { let state: Inner = unsafe { state.to_inner().unwrap() }; crate::do_serialize!(state) } #[pg_extern(immutable, parallel_safe)] pub fn tdigest_compound_deserialize(bytes: bytea, _internal: Internal) -> Option { let i: InternalTDigest = crate::do_deserialize!(bytes, InternalTDigest); Inner::from(i).internal() } extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ tdigest\n\ ) (\n\ sfunc = tdigest_compound_trans,\n\ stype = internal,\n\ finalfunc = tdigest_compound_final,\n\ combinefunc = tdigest_compound_combine,\n\ serialfunc = tdigest_compound_serialize,\n\ deserialfunc = tdigest_compound_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "tdigest_rollup", requires = [ tdigest_compound_trans, tdigest_compound_final, tdigest_compound_combine, tdigest_compound_serialize, tdigest_compound_deserialize ], ); //---- Available PG operations on the digest #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_tdigest_approx_percentile<'a>( sketch: TDigest<'a>, accessor: AccessorApproxPercentile, ) -> f64 { tdigest_quantile(accessor.percentile, sketch) } // Approximate the value at the given quantile (0.0-1.0) #[pg_extern(immutable, parallel_safe, name = "approx_percentile")] pub fn tdigest_quantile<'a>(quantile: f64, digest: TDigest<'a>) -> f64 { digest.to_internal_tdigest().estimate_quantile(quantile) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_tdigest_approx_rank<'a>( sketch: TDigest<'a>, accessor: AccessorApproxPercentileRank, ) -> f64 { tdigest_quantile_at_value(accessor.value, sketch) } // Approximate the quantile at the given value #[pg_extern(immutable, parallel_safe, name = "approx_percentile_rank")] pub fn tdigest_quantile_at_value<'a>(value: f64, digest: TDigest<'a>) -> f64 { digest .to_internal_tdigest() .estimate_quantile_at_value(value) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_tdigest_num_vals<'a>(sketch: TDigest<'a>, _accessor: AccessorNumVals) -> f64 { tdigest_count(sketch) } // Number of elements from which the digest was built. #[pg_extern(immutable, parallel_safe, name = "num_vals")] pub fn tdigest_count<'a>(digest: TDigest<'a>) -> f64 { digest.count as f64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_tdigest_min<'a>(sketch: TDigest<'a>, _accessor: AccessorMinVal) -> f64 { tdigest_min(sketch) } // Minimum value entered in the digest. #[pg_extern(immutable, parallel_safe, name = "min_val")] pub fn tdigest_min<'a>(digest: TDigest<'a>) -> f64 { digest.min } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_tdigest_max<'a>(sketch: TDigest<'a>, _accessor: AccessorMaxVal) -> f64 { tdigest_max(sketch) } // Maximum value entered in the digest. #[pg_extern(immutable, parallel_safe, name = "max_val")] pub fn tdigest_max<'a>(digest: TDigest<'a>) -> f64 { digest.max } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_tdigest_mean<'a>(sketch: TDigest<'a>, _accessor: AccessorMean) -> f64 { tdigest_mean(sketch) } // Average of all the values entered in the digest. // Note that this is not an approximation, though there may be loss of precision. #[pg_extern(immutable, parallel_safe, name = "mean")] pub fn tdigest_mean<'a>(digest: TDigest<'a>) -> f64 { if digest.count > 0 { digest.sum / digest.count as f64 } else { 0.0 } } /// Total sum of all the values entered in the digest. #[pg_extern(immutable, parallel_safe, name = "total")] pub fn tdigest_sum(digest: TDigest<'_>) -> f64 { digest.sum } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; // Assert equality between two floats, within some fixed error range. fn apx_eql(value: f64, expected: f64, error: f64) { assert!( (value - expected).abs() < error, "Float value {value} differs from expected {expected} by more than {error}" ); } // Assert equality between two floats, within an error expressed as a fraction of the expected value. fn pct_eql(value: f64, expected: f64, pct_error: f64) { apx_eql(value, expected, pct_error * expected); } #[pg_test] fn test_tdigest_aggregate() { Spi::connect_mut(|client| { client .update("CREATE TABLE test (data DOUBLE PRECISION)", None, &[]) .unwrap(); client .update( "INSERT INTO test SELECT generate_series(0.01, 100, 0.01)", None, &[], ) .unwrap(); let sanity = client .update("SELECT COUNT(*) FROM test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(10000, sanity.unwrap()); client .update( "CREATE VIEW digest AS \ SELECT tdigest(100, data) FROM test", None, &[], ) .unwrap(); let (min, max, count) = client .update( "SELECT \ min_val(tdigest), \ max_val(tdigest), \ num_vals(tdigest) \ FROM digest", None, &[], ) .unwrap() .first() .get_three::() .unwrap(); apx_eql(min.unwrap(), 0.01, 0.000001); apx_eql(max.unwrap(), 100.0, 0.000001); apx_eql(count.unwrap(), 10000.0, 0.000001); let (min2, max2, count2) = client .update( "SELECT \ tdigest->min_val(), \ tdigest->max_val(), \ tdigest->num_vals() \ FROM digest", None, &[], ) .unwrap() .first() .get_three::() .unwrap(); assert_eq!(min2, min); assert_eq!(max2, max); assert_eq!(count2, count); let (mean, mean2) = client .update( "SELECT \ mean(tdigest), \ tdigest -> mean() FROM digest", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); apx_eql(mean.unwrap(), 50.005, 0.0001); assert_eq!(mean, mean2); for i in 0..=100 { let value = i as f64; let quantile = value / 100.0; let (est_val, est_quant) = client .update( &format!( "SELECT approx_percentile({quantile}, tdigest), \ approx_percentile_rank({value}, tdigest) \ FROM digest" ), None, &[], ) .unwrap() .first() .get_two::() .unwrap(); if i == 0 { pct_eql(est_val.unwrap(), 0.01, 1.0); apx_eql(est_quant.unwrap(), quantile, 0.0001); } else { pct_eql(est_val.unwrap(), value, 1.0); pct_eql(est_quant.unwrap(), quantile, 1.0); } let (est_val2, est_quant2) = client .update( &format!( "SELECT tdigest->approx_percentile({quantile}), \ tdigest->approx_percentile_rank({value}) \ FROM digest" ), None, &[], ) .unwrap() .first() .get_two::() .unwrap(); assert_eq!(est_val2, est_val); assert_eq!(est_quant2, est_quant); } }); } #[pg_test] fn test_tdigest_small_count() { Spi::connect_mut(|client| { let estimate = client .update( "SELECT \ approx_percentile(\ 0.99, \ tdigest(100, data)) \ FROM generate_series(1, 100) data;", None, &[], ) .unwrap() .first() .get_one() .unwrap(); assert_eq!(estimate, Some(99.5)); }); } #[pg_test] fn serialization_matches() { let mut t = InternalTDigest::new_with_size(10); let vals = vec![1.0, 1.0, 1.0, 2.0, 1.0, 1.0]; for v in vals { t = t.merge_unsorted(vec![v]); } let pgt = TDigest::from_internal_tdigest(&t); let mut si = StringInfo::new(); pgt.output(&mut si); assert_eq!(t.format_for_postgres(), si.to_string()); } #[pg_test] fn test_tdigest_io() { Spi::connect_mut(|client| { let output = client .update( "SELECT \ tdigest(100, data)::text \ FROM generate_series(1, 100) data;", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let expected = "(version:1,buckets:88,max_buckets:100,count:100,sum:5050,min:1,max:100,centroids:[(mean:1,weight:1),(mean:2,weight:1),(mean:3,weight:1),(mean:4,weight:1),(mean:5,weight:1),(mean:6,weight:1),(mean:7,weight:1),(mean:8,weight:1),(mean:9,weight:1),(mean:10,weight:1),(mean:11,weight:1),(mean:12,weight:1),(mean:13,weight:1),(mean:14,weight:1),(mean:15,weight:1),(mean:16,weight:1),(mean:17,weight:1),(mean:18,weight:1),(mean:19,weight:1),(mean:20,weight:1),(mean:21,weight:1),(mean:22,weight:1),(mean:23,weight:1),(mean:24,weight:1),(mean:25,weight:1),(mean:26,weight:1),(mean:27,weight:1),(mean:28,weight:1),(mean:29,weight:1),(mean:30,weight:1),(mean:31,weight:1),(mean:32,weight:1),(mean:33,weight:1),(mean:34,weight:1),(mean:35,weight:1),(mean:36,weight:1),(mean:37,weight:1),(mean:38,weight:1),(mean:39,weight:1),(mean:40,weight:1),(mean:41,weight:1),(mean:42,weight:1),(mean:43,weight:1),(mean:44,weight:1),(mean:45,weight:1),(mean:46,weight:1),(mean:47,weight:1),(mean:48,weight:1),(mean:49,weight:1),(mean:50,weight:1),(mean:51,weight:1),(mean:52.5,weight:2),(mean:54.5,weight:2),(mean:56.5,weight:2),(mean:58.5,weight:2),(mean:60.5,weight:2),(mean:62.5,weight:2),(mean:64,weight:1),(mean:65.5,weight:2),(mean:67.5,weight:2),(mean:69,weight:1),(mean:70.5,weight:2),(mean:72,weight:1),(mean:73.5,weight:2),(mean:75,weight:1),(mean:76,weight:1),(mean:77.5,weight:2),(mean:79,weight:1),(mean:80,weight:1),(mean:81.5,weight:2),(mean:83,weight:1),(mean:84,weight:1),(mean:85,weight:1),(mean:86,weight:1),(mean:87,weight:1),(mean:88,weight:1),(mean:89,weight:1),(mean:90,weight:1),(mean:91,weight:1),(mean:92,weight:1),(mean:93,weight:1),(mean:94,weight:1),(mean:95,weight:1),(mean:96,weight:1),(mean:97,weight:1),(mean:98,weight:1),(mean:99,weight:1),(mean:100,weight:1)])"; assert_eq!(output, Some(expected.into())); let estimate = client .update( &format!("SELECT approx_percentile(0.90, '{expected}'::tdigest)"), None, &[], ) .unwrap() .first() .get_one() .unwrap(); assert_eq!(estimate, Some(90.5)); }); } #[pg_test] fn test_tdigest_byte_io() { unsafe { use std::ptr; let state = tdigest_trans_inner(None, 100, Some(14.0), ptr::null_mut()); let state = tdigest_trans_inner(state, 100, Some(18.0), ptr::null_mut()); let state = tdigest_trans_inner(state, 100, Some(22.7), ptr::null_mut()); let state = tdigest_trans_inner(state, 100, Some(39.42), ptr::null_mut()); let state = tdigest_trans_inner(state, 100, Some(-43.0), ptr::null_mut()); let mut control = state.unwrap(); let buffer = tdigest_serialize(Inner::from(control.clone()).internal().unwrap()); let buffer = pgrx::varlena::varlena_to_byte_slice(buffer.0.cast_mut_ptr()); let expected = [ 1, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 69, 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 64, 1, 0, 0, 0, 0, 0, 0, 0, 51, 51, 51, 51, 51, 179, 54, 64, 1, 0, 0, 0, 0, 0, 0, 0, 246, 40, 92, 143, 194, 181, 67, 64, 1, 0, 0, 0, 0, 0, 0, 0, 100, 0, 0, 0, 0, 0, 0, 0, 144, 194, 245, 40, 92, 143, 73, 64, 5, 0, 0, 0, 0, 0, 0, 0, 246, 40, 92, 143, 194, 181, 67, 64, 0, 0, 0, 0, 0, 128, 69, 192, ]; assert_eq!(buffer, expected); let expected = pgrx::varlena::rust_byte_slice_to_bytea(&expected); let mut new_state = tdigest_deserialize_inner(bytea(pg_sys::Datum::from(expected.as_ptr()))); assert_eq!(new_state.build(), control.build()); } } #[pg_test] fn test_tdigest_compound_agg() { Spi::connect_mut(|client| { client .update( "CREATE TABLE new_test (device INTEGER, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("INSERT INTO new_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, &[]).unwrap(); let sanity = client .update("SELECT COUNT(*) FROM new_test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(Some(1010), sanity); client .update( "CREATE VIEW digests AS \ SELECT device, tdigest(20, value) \ FROM new_test \ GROUP BY device", None, &[], ) .unwrap(); client .update( "CREATE VIEW composite AS \ SELECT tdigest(tdigest) \ FROM digests", None, &[], ) .unwrap(); client .update( "CREATE VIEW base AS \ SELECT tdigest(20, value) \ FROM new_test", None, &[], ) .unwrap(); let value = client .update( "SELECT \ approx_percentile(0.9, tdigest) \ FROM base", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let test_value = client .update( "SELECT \ approx_percentile(0.9, tdigest) \ FROM composite", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); apx_eql(test_value.unwrap(), value.unwrap(), 0.1); apx_eql(test_value.unwrap(), 9.0, 0.1); }); } } ================================================ FILE: extension/src/time_vector/iter.rs ================================================ use tspoint::TSPoint; use Iter::*; pub enum Iter<'a> { Slice { iter: flat_serialize::Iter<'a, 'a, TSPoint>, }, } impl<'a> Iterator for Iter<'a> { type Item = TSPoint; fn next(&mut self) -> Option { match self { Slice { iter } => iter.next(), } } // XXX the functions below, `last()` and `count()` in particular rely on // this being precise and accurate, with both elements of the tuple // being the same as the actual yielded number of elements, if this // changes those will also need to change fn size_hint(&self) -> (usize, Option) { match self { Slice { iter } => (iter.len(), Some(iter.len())), } } fn count(self) -> usize where Self: Sized, { self.size_hint().0 } } ================================================ FILE: extension/src/time_vector/pipeline/aggregation.rs ================================================ use std::mem::take; use pgrx::*; use counter_agg::CounterSummaryBuilder; use super::*; use crate::{ accessors::{AccessorAverage, AccessorNumVals, AccessorSum}, build, counter_agg::CounterSummary, hyperloglog::HyperLogLog, pg_type, ron_inout_funcs, stats_agg::{self, InternalStatsSummary1D, StatsSummary1D}, uddsketch::UddSketch, }; use self::toolkit_experimental::{ PipelineThenAverage, PipelineThenAverageData, PipelineThenCounterAgg, PipelineThenCounterAggData, PipelineThenHyperLogLog, PipelineThenHyperLogLogData, PipelineThenNumVals, PipelineThenNumValsData, PipelineThenPercentileAgg, PipelineThenPercentileAggData, PipelineThenStatsAgg, PipelineThenStatsAggData, PipelineThenSum, PipelineThenSumData, }; #[pg_schema] pub mod toolkit_experimental { use super::*; pub(crate) use crate::time_vector::pipeline::UnstableTimevectorPipeline; pg_type! { #[derive(Debug)] struct PipelineThenStatsAgg<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenStatsAgg<'input>); pg_type! { #[derive(Debug)] struct PipelineThenSum<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenSum<'input>); pg_type! { #[derive(Debug)] struct PipelineThenAverage<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenAverage<'input>); pg_type! { #[derive(Debug)] struct PipelineThenNumVals<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenNumVals<'input>); pg_type! { #[derive(Debug)] struct PipelineThenCounterAgg<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenCounterAgg<'input>); pg_type! { #[derive(Debug)] struct PipelineThenHyperLogLog<'input> { hll_size: u64, num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenHyperLogLog<'input>); pg_type! { #[derive(Debug)] struct PipelineThenPercentileAgg<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenPercentileAgg<'input>); } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_run_pipeline_then_stats_agg<'a>( mut timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenStatsAgg<'a>, ) -> StatsSummary1D { if timevector.has_nulls() { panic!("Unable to compute stats aggregate over timevector containing nulls"); } timevector = run_pipeline_elements(timevector, pipeline.elements.iter()); let mut stats = InternalStatsSummary1D::new(); for TSPoint { val, .. } in timevector.iter() { stats.accum(val).expect("error while running stats_agg"); } StatsSummary1D::from_internal(stats) } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn finalize_with_stats_agg<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_stats_agg: toolkit_experimental::PipelineThenStatsAgg<'e>, ) -> toolkit_experimental::PipelineThenStatsAgg<'e> { if then_stats_agg.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenStatsAgg { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_stats_agg.elements.iter()); build! { PipelineThenStatsAgg { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_extern( immutable, parallel_safe, name = "stats_agg", schema = "toolkit_experimental" )] pub fn pipeline_stats_agg() -> toolkit_experimental::PipelineThenStatsAgg<'static> { build! { PipelineThenStatsAgg { num_elements: 0, elements: vec![].into(), } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_stats_agg_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineThenStatsAgg::from_polymorphic_datum(new_element, false, pg_sys::Oid::INVALID) .unwrap(); finalize_with_stats_agg(old_pipeline, new_element) .into_datum() .unwrap() }) } // using this instead of pg_operator since the latter doesn't support schemas yet // FIXME there is no CREATE OR REPLACE OPERATOR need to update post-install.rs // need to ensure this works with out unstable warning extension_sql!( r#" ALTER FUNCTION "arrow_run_pipeline_then_stats_agg" SUPPORT toolkit_experimental.pipeline_stats_agg_support; "#, name = "pipeline_stats_agg_support", requires = [pipeline_stats_agg_support], ); #[pg_extern( immutable, parallel_safe, name = "sum_cast", schema = "toolkit_experimental" )] pub fn sum_pipeline_element<'a>( accessor: AccessorSum, ) -> toolkit_experimental::PipelineThenSum<'static> { let _ = accessor; build! { PipelineThenSum { num_elements: 0, elements: vec![].into(), } } } extension_sql!( r#" CREATE CAST (AccessorSum AS toolkit_experimental.PipelineThenSum) WITH FUNCTION toolkit_experimental.sum_cast AS IMPLICIT; "#, name = "sum_pipe_cast", requires = [AccessorSum, PipelineThenSum, sum_pipeline_element], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_pipeline_then_sum<'a>( timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenSum<'a>, ) -> Option { let pipeline = pipeline.0; let pipeline = build! { PipelineThenStatsAgg { num_elements: pipeline.num_elements, elements: pipeline.elements, } }; let stats_agg = arrow_run_pipeline_then_stats_agg(timevector, pipeline); stats_agg::stats1d_sum(stats_agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn finalize_with_sum<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_stats_agg: toolkit_experimental::PipelineThenSum<'e>, ) -> toolkit_experimental::PipelineThenSum<'e> { if then_stats_agg.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenSum { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_stats_agg.elements.iter()); build! { PipelineThenSum { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_sum_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineThenSum::from_polymorphic_datum(new_element, false, pg_sys::Oid::INVALID) .unwrap(); finalize_with_sum(old_pipeline, new_element) .into_datum() .unwrap() }) } extension_sql!( r#" ALTER FUNCTION "arrow_pipeline_then_sum" SUPPORT toolkit_experimental.pipeline_sum_support; "#, name = "arrow_then_sum_support", requires = [pipeline_sum_support], ); #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn average_pipeline_element( accessor: AccessorAverage, ) -> toolkit_experimental::PipelineThenAverage<'static> { let _ = accessor; build! { PipelineThenAverage { num_elements: 0, elements: vec![].into(), } } } extension_sql!( r#" CREATE CAST (AccessorAverage AS toolkit_experimental.PipelineThenAverage) WITH FUNCTION toolkit_experimental.average_pipeline_element AS IMPLICIT; "#, name = "avg_pipe_cast", requires = [ AccessorAverage, PipelineThenAverage, average_pipeline_element ], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_pipeline_then_average<'a>( timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenAverage<'a>, ) -> Option { let pipeline = pipeline.0; let pipeline = build! { PipelineThenStatsAgg { num_elements: pipeline.num_elements, elements: pipeline.elements, } }; let stats_agg = arrow_run_pipeline_then_stats_agg(timevector, pipeline); stats_agg::stats1d_average(stats_agg) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn finalize_with_average<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_stats_agg: toolkit_experimental::PipelineThenAverage<'e>, ) -> toolkit_experimental::PipelineThenAverage<'e> { if then_stats_agg.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenAverage { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_stats_agg.elements.iter()); build! { PipelineThenAverage { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_average_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineThenAverage::from_polymorphic_datum(new_element, false, pg_sys::Oid::INVALID) .unwrap(); finalize_with_average(old_pipeline, new_element) .into_datum() .unwrap() }) } extension_sql!( r#" ALTER FUNCTION "arrow_pipeline_then_average" SUPPORT toolkit_experimental.pipeline_average_support; "#, name = "pipe_avg_support", requires = [pipeline_average_support], ); #[pg_extern( immutable, parallel_safe, name = "num_vals_cast", schema = "toolkit_experimental" )] pub fn num_vals_pipeline_element( accessor: AccessorNumVals, ) -> toolkit_experimental::PipelineThenNumVals<'static> { let _ = accessor; build! { PipelineThenNumVals { num_elements: 0, elements: vec![].into(), } } } extension_sql!( r#" CREATE CAST (AccessorNumVals AS toolkit_experimental.PipelineThenNumVals) WITH FUNCTION toolkit_experimental.num_vals_cast AS IMPLICIT; "#, name = "num_vals_pipe_cast", requires = [ AccessorNumVals, PipelineThenNumVals, num_vals_pipeline_element ], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_pipeline_then_num_vals<'a>( timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenNumVals<'a>, ) -> i64 { run_pipeline_elements(timevector, pipeline.elements.iter()).num_vals() as _ } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn finalize_with_num_vals<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_stats_agg: toolkit_experimental::PipelineThenNumVals<'e>, ) -> toolkit_experimental::PipelineThenNumVals<'e> { if then_stats_agg.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenNumVals { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_stats_agg.elements.iter()); build! { PipelineThenNumVals { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_num_vals_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineThenNumVals::from_polymorphic_datum(new_element, false, pg_sys::Oid::INVALID) .unwrap(); finalize_with_num_vals(old_pipeline, new_element) .into_datum() .unwrap() }) } extension_sql!( r#" ALTER FUNCTION "arrow_pipeline_then_num_vals" SUPPORT toolkit_experimental.pipeline_num_vals_support; "#, name = "pipe_then_num_vals", requires = [pipeline_num_vals_support], ); // TODO support gauge #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_run_pipeline_then_counter_agg<'a>( mut timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenCounterAgg<'a>, ) -> Option { timevector = run_pipeline_elements(timevector, pipeline.elements.iter()); if timevector.num_points() == 0 { return None; } let mut it = timevector.iter(); let mut summary = CounterSummaryBuilder::new(&it.next().unwrap(), None); for point in it { summary .add_point(&point) .expect("error while running counter_agg"); } Some(CounterSummary::from_internal_counter_summary( summary.build(), )) } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn finalize_with_counter_agg<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_counter_agg: toolkit_experimental::PipelineThenCounterAgg<'e>, ) -> toolkit_experimental::PipelineThenCounterAgg<'e> { if then_counter_agg.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenCounterAgg { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_counter_agg.elements.iter()); build! { PipelineThenCounterAgg { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_extern( immutable, parallel_safe, name = "counter_agg", schema = "toolkit_experimental" )] pub fn pipeline_counter_agg() -> toolkit_experimental::PipelineThenCounterAgg<'static> { build! { PipelineThenCounterAgg { num_elements: 0, elements: vec![].into(), } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_counter_agg_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineThenCounterAgg::from_polymorphic_datum( new_element, false, pg_sys::Oid::INVALID, ) .unwrap(); finalize_with_counter_agg(old_pipeline, new_element) .into_datum() .unwrap() }) } // using this instead of pg_operator since the latter doesn't support schemas yet // FIXME there is no CREATE OR REPLACE OPERATOR need to update post-install.rs // need to ensure this works with out unstable warning extension_sql!( r#" ALTER FUNCTION "arrow_run_pipeline_then_counter_agg" SUPPORT toolkit_experimental.pipeline_counter_agg_support; "#, name = "pipe_then_counter_agg", requires = [pipeline_counter_agg_support], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_run_pipeline_then_hyperloglog<'a>( mut timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenHyperLogLog<'a>, ) -> HyperLogLog<'static> { timevector = run_pipeline_elements(timevector, pipeline.elements.iter()); HyperLogLog::build_from( pipeline.hll_size as i32, PgBuiltInOids::FLOAT8OID.into(), None, timevector .iter() .map(|point| point.val.into_datum().unwrap()), ) } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn finalize_with_hyperloglog<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_hyperloglog: toolkit_experimental::PipelineThenHyperLogLog<'e>, ) -> toolkit_experimental::PipelineThenHyperLogLog<'e> { if then_hyperloglog.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenHyperLogLog { hll_size: then_hyperloglog.hll_size, num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_hyperloglog.elements.iter()); build! { PipelineThenHyperLogLog { hll_size: then_hyperloglog.hll_size, num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_extern( immutable, parallel_safe, name = "hyperloglog", schema = "toolkit_experimental" )] pub fn pipeline_hyperloglog(size: i32) -> toolkit_experimental::PipelineThenHyperLogLog<'static> { build! { PipelineThenHyperLogLog { hll_size: size as u64, num_elements: 0, elements: vec![].into(), } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_hyperloglog_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineThenHyperLogLog::from_polymorphic_datum( new_element, false, pg_sys::Oid::INVALID, ) .unwrap(); finalize_with_hyperloglog(old_pipeline, new_element) .into_datum() .unwrap() }) } // using this instead of pg_operator since the latter doesn't support schemas yet // FIXME there is no CREATE OR REPLACE OPERATOR need to update post-install.rs // need to ensure this works with out unstable warning extension_sql!( r#" ALTER FUNCTION "arrow_run_pipeline_then_hyperloglog" SUPPORT toolkit_experimental.pipeline_hyperloglog_support; "#, name = "pipe_then_hll", requires = [pipeline_hyperloglog_support], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_run_pipeline_then_percentile_agg<'a>( mut timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenPercentileAgg<'a>, ) -> UddSketch<'static> { timevector = run_pipeline_elements(timevector, pipeline.elements.iter()); UddSketch::from_iter(timevector.into_iter().map(|p| p.val)) } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub fn finalize_with_percentile_agg<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_hyperloglog: toolkit_experimental::PipelineThenPercentileAgg<'e>, ) -> toolkit_experimental::PipelineThenPercentileAgg<'e> { if then_hyperloglog.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenPercentileAgg { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_hyperloglog.elements.iter()); build! { PipelineThenPercentileAgg { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_extern( immutable, parallel_safe, name = "percentile_agg", schema = "toolkit_experimental" )] pub fn pipeline_percentile_agg() -> toolkit_experimental::PipelineThenPercentileAgg<'static> { build! { PipelineThenPercentileAgg { num_elements: 0, elements: vec![].into(), } } } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_percentile_agg_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineThenPercentileAgg::from_polymorphic_datum( new_element, false, pg_sys::Oid::INVALID, ) .unwrap(); finalize_with_percentile_agg(old_pipeline, new_element) .into_datum() .unwrap() }) } // using this instead of pg_operator since the latter doesn't support schemas yet // FIXME there is no CREATE OR REPLACE OPERATOR need to update post-install.rs // need to ensure this works with out unstable warning extension_sql!( r#" ALTER FUNCTION "arrow_run_pipeline_then_percentile_agg" SUPPORT toolkit_experimental.pipeline_percentile_agg_support; "#, name = "pipe_then_percentile", requires = [pipeline_percentile_agg_support], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_stats_agg_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> stats_agg())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,n:5,sx:100,sx2:250,sx3:0,sx4:21250)" ); }); } #[pg_test] fn test_stats_agg_pipeline_folding() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('1930-04-05'::timestamptz, 123.0) \ -> ceil() -> abs() -> floor() \ -> stats_agg() -> average();", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: (\ arrow_run_pipeline_then_stats_agg(\ timevector('1930-04-05 00:00:00+00'::timestamp with time zone, '123'::double precision), \ '(version:1,num_elements:3,elements:[\ Arithmetic(function:Ceil,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Floor,rhs:0)\ ])'::pipelinethenstatsagg\ ) -> '(version:1)'::accessoraverage)"); }); } #[pg_test] fn test_sum_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> sum())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), "100"); }); } #[pg_test] fn test_sum_pipeline_folding() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('1930-04-05'::timestamptz, 123.0) \ -> ceil() -> abs() -> floor() \ -> sum();", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: \ arrow_pipeline_then_sum(\ timevector('1930-04-05 00:00:00+00'::timestamp with time zone, '123'::double precision), \ '(version:1,num_elements:3,elements:[\ Arithmetic(function:Ceil,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Floor,rhs:0)\ ])'::pipelinethensum\ )"); }); } #[pg_test] fn test_average_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> average())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), "20"); }); } #[pg_test] fn test_average_pipeline_folding() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('1930-04-05'::timestamptz, 123.0) \ -> ceil() -> abs() -> floor() \ -> average();", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: \ arrow_pipeline_then_average(\ timevector('1930-04-05 00:00:00+00'::timestamp with time zone, '123'::double precision), \ '(version:1,num_elements:3,elements:[\ Arithmetic(function:Ceil,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Floor,rhs:0)\ ])'::pipelinethenaverage\ )"); }); } #[pg_test] fn test_num_vals_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> num_vals())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), "5"); }); } #[pg_test] fn test_num_vals_pipeline_folding() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('1930-04-05'::timestamptz, 123.0) \ -> ceil() -> abs() -> floor() \ -> num_vals();", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: \ arrow_pipeline_then_num_vals(\ timevector('1930-04-05 00:00:00+00'::timestamp with time zone, '123'::double precision), \ '(version:1,num_elements:3,elements:[\ Arithmetic(function:Ceil,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Floor,rhs:0)\ ])'::pipelinethennumvals\ )"); }); } #[pg_test] fn test_counter_agg_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!( "SELECT (series -> sort() -> counter_agg())::TEXT FROM ({create_series}) s" ), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), "(version:1,stats:(n:5,sx:3156624000,sx2:74649600000,sx3:0,sx4:1894671345254400000000,sy:215,sy2:2280,sy3:6720.000000000007,sy4:1788960,sxy:12960000),first:(ts:\"2020-01-01 00:00:00+00\",val:15),second:(ts:\"2020-01-02 00:00:00+00\",val:25),penultimate:(ts:\"2020-01-04 00:00:00+00\",val:10),last:(ts:\"2020-01-05 00:00:00+00\",val:30),reset_sum:45,num_resets:2,num_changes:4,bounds:(is_present:0,has_left:0,has_right:0,padding:(0,0,0,0,0),left:None,right:None))"); let val = client.update( &format!("SELECT series -> sort() -> counter_agg() -> with_bounds('[2020-01-01 UTC, 2020-02-01 UTC)') -> extrapolated_delta('prometheus') FROM ({create_series}) s"), None, &[] ) .unwrap().first() .get_one::().unwrap().unwrap(); assert!((val - 67.5).abs() < f64::EPSILON); let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('1930-04-05'::timestamptz, 123.0) \ -> ceil() -> abs() -> floor() \ -> counter_agg();", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: \ arrow_run_pipeline_then_counter_agg(\ timevector('1930-04-05 00:00:00+00'::timestamp with time zone, '123'::double precision), \ '(version:1,num_elements:3,elements:[\ Arithmetic(function:Ceil,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Floor,rhs:0)\ ])'::pipelinethencounteragg\ )"); }) } #[pg_test] fn test_hyperloglog_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0), \ ('2020-01-06 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-07 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-08 UTC'::TIMESTAMPTZ, 35.0), \ ('2020-01-09 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-10 UTC'::TIMESTAMPTZ, 5.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> hyperloglog(100))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), "(version:1,log:Sparse(num_compressed:7,element_type:FLOAT8,collation:None,compressed_bytes:28,precision:7,compressed:[136,188,20,7,8,30,244,43,72,69,89,2,72,255,97,27,72,83,248,27,200,110,35,5,8,37,85,12]))"); let val = client .update( &format!( "SELECT series -> hyperloglog(100) -> distinct_count() FROM ({create_series}) s" ), None, &[] ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(val, 7); let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('1930-04-05'::timestamptz, 123.0) \ -> ceil() -> abs() -> floor() \ -> hyperloglog(100);", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: \ arrow_run_pipeline_then_hyperloglog(\ timevector('1930-04-05 00:00:00+00'::timestamp with time zone, '123'::double precision), \ '(version:1,hll_size:100,num_elements:3,elements:[\ Arithmetic(function:Ceil,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Floor,rhs:0)\ ])'::pipelinethenhyperloglog\ )"); }) } #[pg_test] fn test_percentile_agg_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> percentile_agg())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,\ alpha:0.001,\ max_buckets:200,\ num_buckets:5,\ compactions:0,\ count:5,\ sum:100,\ buckets:[\ (Positive(1152),1),\ (Positive(1355),1),\ (Positive(1498),1),\ (Positive(1610),1),\ (Positive(1701),1)\ ]\ )", ); }); } #[pg_test] fn test_percentile_agg_pipeline_folding() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('1930-04-05'::timestamptz, 123.0) \ -> ceil() -> abs() -> floor() \ -> percentile_agg();", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: \ arrow_run_pipeline_then_percentile_agg(\ timevector('1930-04-05 00:00:00+00'::timestamp with time zone, '123'::double precision), \ '(version:1,num_elements:3,elements:[\ Arithmetic(function:Ceil,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Floor,rhs:0)\ ])'::pipelinethenpercentileagg\ )"); }); } } ================================================ FILE: extension/src/time_vector/pipeline/arithmetic.rs ================================================ use pgrx::*; use super::*; use super::Element::Arithmetic; use Function::*; #[derive( Debug, Copy, Clone, flat_serialize_macro::FlatSerializable, serde::Serialize, serde::Deserialize, )] #[repr(u64)] //XXX note that the order here _is_ significant; it can be visible in the // serialized form pub enum Function { // binary functions Add = 1, Sub = 2, Mul = 3, Div = 4, Mod = 5, Power = 6, LogN = 7, // Unary functions Abs, Cbrt, Ceil, Floor, Ln, Log10, Round, // nearest Sign, Sqrt, Trunc, } pub fn apply( mut series: Timevector_TSTZ_F64<'_>, function: Function, rhs: f64, ) -> Timevector_TSTZ_F64<'_> { let function: fn(f64, f64) -> f64 = match function { Add => |a, b| a + b, Sub => |a, b| a - b, Mul => |a, b| a * b, Div => |a, b| a / b, // TODO is this the right mod? Mod => |a, b| a % b, Power => |a, b| a.powf(b), LogN => |a, b| a.log(b), // unary functions just ignore the second arg Abs => |a, _| a.abs(), Cbrt => |a, _| a.cbrt(), Ceil => |a, _| a.ceil(), Floor => |a, _| a.floor(), Ln => |a, _| a.ln(), Log10 => |a, _| a.log10(), Round => |a, _| a.round(), Sign => |a, _| a.signum(), Sqrt => |a, _| a.sqrt(), Trunc => |a, _| a.trunc(), }; map::map_series(&mut series, |lhs| function(lhs, rhs)); series } // // binary operations // #[pg_extern( immutable, parallel_safe, name = "add", schema = "toolkit_experimental" )] pub fn pipeline_add(rhs: f64) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Add, rhs }.flatten() } #[pg_extern( immutable, parallel_safe, name = "sub", schema = "toolkit_experimental" )] pub fn pipeline_sub(rhs: f64) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Sub, rhs }.flatten() } #[pg_extern( immutable, parallel_safe, name = "mul", schema = "toolkit_experimental" )] pub fn pipeline_mul(rhs: f64) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Mul, rhs }.flatten() } #[pg_extern( immutable, parallel_safe, name = "div", schema = "toolkit_experimental" )] pub fn pipeline_div(rhs: f64) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Div, rhs }.flatten() } #[pg_extern( immutable, parallel_safe, name = "mod", schema = "toolkit_experimental" )] pub fn pipeline_mod(rhs: f64) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Mod, rhs }.flatten() } #[pg_extern( immutable, parallel_safe, name = "power", schema = "toolkit_experimental" )] pub fn pipeline_power(rhs: f64) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Power, rhs, } .flatten() } // log(double) already exists as the log base 10 so we need a new name #[pg_extern( immutable, parallel_safe, name = "logn", schema = "toolkit_experimental" )] pub fn pipeline_log_n(rhs: f64) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: LogN, rhs, } .flatten() } // // unary operations // #[pg_extern( immutable, parallel_safe, name = "abs", schema = "toolkit_experimental" )] pub fn pipeline_abs() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Abs, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "cbrt", schema = "toolkit_experimental" )] pub fn pipeline_cbrt() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Cbrt, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "ceil", schema = "toolkit_experimental" )] pub fn pipeline_ceil() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Ceil, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "floor", schema = "toolkit_experimental" )] pub fn pipeline_floor() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Floor, rhs: 0.0, } .flatten() } #[pg_extern(immutable, parallel_safe, name = "ln", schema = "toolkit_experimental")] pub fn pipeline_ln() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Ln, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "log10", schema = "toolkit_experimental" )] pub fn pipeline_log10() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Log10, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "round", schema = "toolkit_experimental" )] pub fn pipeline_round() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Round, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "sign", schema = "toolkit_experimental" )] pub fn pipeline_sign() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Sign, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "sqrt", schema = "toolkit_experimental" )] pub fn pipeline_sqrt() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Sqrt, rhs: 0.0, } .flatten() } #[pg_extern( immutable, parallel_safe, name = "trunc", schema = "toolkit_experimental" )] pub fn pipeline_trunc() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Arithmetic { function: Trunc, rhs: 0.0, } .flatten() } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_simple_arith_binops() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> add(1.0))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:26),\ (ts:\"2020-01-01 00:00:00+00\",val:11),\ (ts:\"2020-01-03 00:00:00+00\",val:21),\ (ts:\"2020-01-02 00:00:00+00\",val:16),\ (ts:\"2020-01-05 00:00:00+00\",val:31)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> sub(3.0))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:22),\ (ts:\"2020-01-01 00:00:00+00\",val:7),\ (ts:\"2020-01-03 00:00:00+00\",val:17),\ (ts:\"2020-01-02 00:00:00+00\",val:12),\ (ts:\"2020-01-05 00:00:00+00\",val:27)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> mul(2.0))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:50),\ (ts:\"2020-01-01 00:00:00+00\",val:20),\ (ts:\"2020-01-03 00:00:00+00\",val:40),\ (ts:\"2020-01-02 00:00:00+00\",val:30),\ (ts:\"2020-01-05 00:00:00+00\",val:60)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> div(5.0))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:5),\ (ts:\"2020-01-01 00:00:00+00\",val:2),\ (ts:\"2020-01-03 00:00:00+00\",val:4),\ (ts:\"2020-01-02 00:00:00+00\",val:3),\ (ts:\"2020-01-05 00:00:00+00\",val:6)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> mod(5.0))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:0),\ (ts:\"2020-01-01 00:00:00+00\",val:0),\ (ts:\"2020-01-03 00:00:00+00\",val:0),\ (ts:\"2020-01-02 00:00:00+00\",val:0),\ (ts:\"2020-01-05 00:00:00+00\",val:0)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> power(2.0))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:625),\ (ts:\"2020-01-01 00:00:00+00\",val:100),\ (ts:\"2020-01-03 00:00:00+00\",val:400),\ (ts:\"2020-01-02 00:00:00+00\",val:225),\ (ts:\"2020-01-05 00:00:00+00\",val:900)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> logn(10.0))::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:1.3979400086720375),\ (ts:\"2020-01-01 00:00:00+00\",val:1),\ (ts:\"2020-01-03 00:00:00+00\",val:1.301029995663981),\ (ts:\"2020-01-02 00:00:00+00\",val:1.1760912590556811),\ (ts:\"2020-01-05 00:00:00+00\",val:1.4771212547196624)\ ],null_val:[0])" ); }); } #[pg_test] fn test_simple_arith_unaryops() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.5), \ ('2020-01-01 UTC'::TIMESTAMPTZ, -10.1), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.2), \ ('2020-01-02 UTC'::TIMESTAMPTZ, -15.6), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.3)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> abs())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25.5),\ (ts:\"2020-01-01 00:00:00+00\",val:10.1),\ (ts:\"2020-01-03 00:00:00+00\",val:20.2),\ (ts:\"2020-01-02 00:00:00+00\",val:15.6),\ (ts:\"2020-01-05 00:00:00+00\",val:30.3)\ ],null_val:[0])" ); // TODO re-enable once made stable // let val = client.update( // &format!("SELECT (series -> cbrt())::TEXT FROM ({}) s", create_series), // None, // None // ) // .first() // .get_one::().unwrap(); // assert_eq!(val.unwrap(), "[\ // (ts:\"2020-01-04 00:00:00+00\",val:2.943382658441668),\ // (ts:\"2020-01-01 00:00:00+00\",val:-2.161592332945083),\ // (ts:\"2020-01-03 00:00:00+00\",val:2.7234356815688767),\ // (ts:\"2020-01-02 00:00:00+00\",val:-2.4986659549227817),\ // (ts:\"2020-01-05 00:00:00+00\",val:3.117555613369834)\ // ]"); let val = client .update( &format!("SELECT (series -> ceil())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:26),\ (ts:\"2020-01-01 00:00:00+00\",val:-10),\ (ts:\"2020-01-03 00:00:00+00\",val:21),\ (ts:\"2020-01-02 00:00:00+00\",val:-15),\ (ts:\"2020-01-05 00:00:00+00\",val:31)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> floor())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:-11),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:-16),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); // TODO why are there `null`s here? // Josh - likely JSON can't represent nans correctly... // TODO re-enable once made stable // let val = client.update( // &format!("SELECT (series -> ln())::TEXT FROM ({}) s", create_series), // None, // None // ) // .first() // .get_one::().unwrap(); // assert_eq!(val.unwrap(), "[\ // (ts:\"2020-01-04 00:00:00+00\",val:3.2386784521643803),\ // (ts:\"2020-01-01 00:00:00+00\",val:null),\ // (ts:\"2020-01-03 00:00:00+00\",val:3.005682604407159),\ // (ts:\"2020-01-02 00:00:00+00\",val:null),\ // (ts:\"2020-01-05 00:00:00+00\",val:3.4111477125153233)\ // ]"); // TODO re-enable once made stable // let val = client.update( // &format!("SELECT (series -> log10())::TEXT FROM ({}) s", create_series), // None, // None // ) // .first() // .get_one::().unwrap(); // assert_eq!(val.unwrap(), "[\ // (ts:\"2020-01-04 00:00:00+00\",val:1.4065401804339552),\ // (ts:\"2020-01-01 00:00:00+00\",val:null),\ // (ts:\"2020-01-03 00:00:00+00\",val:1.3053513694466237),\ // (ts:\"2020-01-02 00:00:00+00\",val:null),\ // (ts:\"2020-01-05 00:00:00+00\",val:1.481442628502305)\ // ]"); let val = client .update( &format!("SELECT (series -> round())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:26),\ (ts:\"2020-01-01 00:00:00+00\",val:-10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:-16),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); let val = client .update( &format!("SELECT (series -> sign())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:1),\ (ts:\"2020-01-01 00:00:00+00\",val:-1),\ (ts:\"2020-01-03 00:00:00+00\",val:1),\ (ts:\"2020-01-02 00:00:00+00\",val:-1),\ (ts:\"2020-01-05 00:00:00+00\",val:1)\ ],null_val:[0])" ); // TODO re-enable once made stable // let val = client.update( // &format!("SELECT (series -> sqrt())::TEXT FROM ({}) s", create_series), // None, // None // ) // .first() // .get_one::().unwrap(); // assert_eq!(val.unwrap(), "[\ // (ts:\"2020-01-04 00:00:00+00\",val:5.049752469181039),\ // (ts:\"2020-01-01 00:00:00+00\",val:null),\ // (ts:\"2020-01-03 00:00:00+00\",val:4.494441010848846),\ // (ts:\"2020-01-02 00:00:00+00\",val:null),\ // (ts:\"2020-01-05 00:00:00+00\",val:5.504543577809154)\ // ]"); let val = client .update( &format!("SELECT (series -> trunc())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:-10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:-15),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); }); } } ================================================ FILE: extension/src/time_vector/pipeline/delta.rs ================================================ use pgrx::*; use super::*; use crate::accessors::AccessorDelta; // TODO is (immutable, parallel_safe) correct? #[pg_extern( immutable, parallel_safe, name = "delta_cast", schema = "toolkit_experimental" )] pub fn delta_pipeline_element( accessor: AccessorDelta, ) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { let _ = accessor; Element::Delta {}.flatten() } extension_sql!( r#" CREATE CAST (AccessorDelta AS toolkit_experimental.UnstableTimevectorPipeline) WITH FUNCTION toolkit_experimental.delta_cast AS IMPLICIT; "#, name = "accessor_delta_cast", requires = [delta_pipeline_element] ); pub fn timevector_delta<'s>(series: &Timevector_TSTZ_F64<'s>) -> Timevector_TSTZ_F64<'s> { if !series.is_sorted() { panic!("can only compute deltas for sorted timevector"); } if series.has_nulls() { panic!("Unable to compute deltas over timevector containing nulls"); } let mut it = series.iter(); let mut prev = it.next().unwrap().val; let mut delta_points = Vec::new(); for pt in it { delta_points.push(TSPoint { ts: pt.ts, val: pt.val - prev, }); prev = pt.val; } let nulls_len = delta_points.len().div_ceil(8); build!(Timevector_TSTZ_F64 { num_points: delta_points.len() as u32, flags: series.flags, internal_padding: [0; 3], points: delta_points.into(), null_val: std::vec::from_elem(0_u8, nulls_len).into(), }) } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_pipeline_delta() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-04 UTC'::TIMESTAMPTZ, 92.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.8), \ ('2020-01-06 UTC'::TIMESTAMPTZ, 30.8), \ ('2020-01-07 UTC'::TIMESTAMPTZ, 30.8), \ ('2020-01-08 UTC'::TIMESTAMPTZ, 30.9), \ ('2020-01-09 UTC'::TIMESTAMPTZ, -427.2)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value) -> delta())::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:8,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-03 00:00:00+00\",val:-5),\ (ts:\"2020-01-04 00:00:00+00\",val:72),\ (ts:\"2020-01-05 00:00:00+00\",val:-61.2),\ (ts:\"2020-01-06 00:00:00+00\",val:0),\ (ts:\"2020-01-07 00:00:00+00\",val:0),\ (ts:\"2020-01-08 00:00:00+00\",val:0.09999999999999787),\ (ts:\"2020-01-09 00:00:00+00\",val:-458.09999999999997)\ ],null_val:[0])" ); }); } } ================================================ FILE: extension/src/time_vector/pipeline/expansion.rs ================================================ use std::mem::take; use pgrx::{iter::TableIterator, *}; use super::*; use crate::{build, pg_type, ron_inout_funcs}; use self::toolkit_experimental::{ PipelineForceMaterialize, PipelineForceMaterializeData, PipelineThenUnnest, PipelineThenUnnestData, }; #[pg_schema] pub mod toolkit_experimental { pub(crate) use super::*; pg_type! { #[derive(Debug)] struct PipelineThenUnnest<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineThenUnnest<'input>); pg_type! { #[derive(Debug)] struct PipelineForceMaterialize<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } ron_inout_funcs!(PipelineForceMaterialize<'input>); } #[pg_extern( immutable, parallel_safe, name = "unnest", schema = "toolkit_experimental" )] pub fn pipeline_unnest() -> toolkit_experimental::PipelineThenUnnest<'static> { build! { PipelineThenUnnest { num_elements: 0, elements: vec![].into(), } } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_finalize_with_unnest<'p>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'p>, then_stats_agg: toolkit_experimental::PipelineThenUnnest<'p>, ) -> toolkit_experimental::PipelineThenUnnest<'p> { if then_stats_agg.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineThenUnnest { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_stats_agg.elements.iter()); build! { PipelineThenUnnest { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_run_pipeline_then_unnest<'a>( timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineThenUnnest<'a>, ) -> TableIterator<'static, (name!(time, crate::raw::TimestampTz), name!(value, f64))> { let series = run_pipeline_elements(timevector, pipeline.elements.iter()) .0 .into_owned(); crate::time_vector::unnest(series.into()) } #[pg_extern( immutable, parallel_safe, name = "materialize", schema = "toolkit_experimental" )] pub fn pipeline_series() -> toolkit_experimental::PipelineForceMaterialize<'static> { build! { PipelineForceMaterialize { num_elements: 0, elements: vec![].into(), } } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_force_materialize<'e>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'e>, then_stats_agg: toolkit_experimental::PipelineForceMaterialize<'e>, ) -> toolkit_experimental::PipelineForceMaterialize<'e> { if then_stats_agg.num_elements == 0 { // flatten immediately so we don't need a temporary allocation for elements return unsafe { flatten! { PipelineForceMaterialize { num_elements: pipeline.0.num_elements, elements: pipeline.0.elements, } } }; } let mut elements = take(pipeline.elements.as_owned()); elements.extend(then_stats_agg.elements.iter()); build! { PipelineForceMaterialize { num_elements: elements.len().try_into().unwrap(), elements: elements.into(), } } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_run_pipeline_then_materialize<'a>( timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::PipelineForceMaterialize<'a>, ) -> toolkit_experimental::Timevector_TSTZ_F64<'static> { run_pipeline_elements(timevector, pipeline.elements.iter()).in_current_context() } #[pg_extern(immutable, parallel_safe, schema = "toolkit_experimental")] pub unsafe fn pipeline_materialize_support(input: pgrx::Internal) -> pgrx::Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = PipelineForceMaterialize::from_polymorphic_datum( new_element, false, pg_sys::Oid::INVALID, ) .unwrap(); arrow_force_materialize(old_pipeline, new_element) .into_datum() .unwrap() }) } extension_sql!( r#" ALTER FUNCTION "arrow_run_pipeline_then_materialize" SUPPORT toolkit_experimental.pipeline_materialize_support; "#, name = "pipe_then_materialize", requires = [pipeline_materialize_support], ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_unnest_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)) as v(time, value)"; let val = client .update( &format!( "SELECT array_agg(val)::TEXT \ FROM (SELECT series -> unnest() as val FROM ({create_series}) s) t" ), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), "{\"(\\\"2020-01-04 00:00:00+00\\\",25)\",\"(\\\"2020-01-01 00:00:00+00\\\",10)\",\"(\\\"2020-01-03 00:00:00+00\\\",20)\",\"(\\\"2020-01-02 00:00:00+00\\\",15)\",\"(\\\"2020-01-05 00:00:00+00\\\",30)\"}"); }); } #[pg_test] fn test_series_finalizer() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // we use a subselect to guarantee order let create_series = "SELECT timevector(time, value) as series FROM \ (VALUES ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 11.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 21.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 31.0)) as v(time, value)"; let val = client .update( &format!("SELECT (series -> materialize())::TEXT FROM ({create_series}) s"), None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:11),\ (ts:\"2020-01-03 00:00:00+00\",val:21),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:31)\ ],null_val:[0])" ); }); } #[pg_test] fn test_force_materialize() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); // `-> materialize()` should force materialization, but otherwise the // pipeline-folding optimization should proceed let output = client .update( "EXPLAIN (verbose) SELECT \ timevector('2021-01-01'::timestamptz, 0.1) \ -> round() -> abs() \ -> materialize() \ -> abs() -> round();", None, &[], ) .unwrap() .nth(1) .unwrap() .get_datum_by_ordinal(1) .unwrap() .value::() .unwrap() .unwrap(); assert_eq!(output.trim(), "Output: \ arrow_run_pipeline(\ arrow_run_pipeline_then_materialize(\ timevector('2021-01-01 00:00:00+00'::timestamp with time zone, '0.1'::double precision), \ '(version:1,num_elements:2,elements:[\ Arithmetic(function:Round,rhs:0),Arithmetic(function:Abs,rhs:0)\ ])'::pipelineforcematerialize\ ), \ '(version:1,num_elements:2,elements:[\ Arithmetic(function:Abs,rhs:0),Arithmetic(function:Round,rhs:0)\ ])'::unstabletimevectorpipeline\ )"); }); } } ================================================ FILE: extension/src/time_vector/pipeline/fill_to.rs ================================================ use pgrx::*; use flat_serialize_macro::FlatSerializable; use serde::{Deserialize, Serialize}; use super::*; // TODO: there are one or two other gapfill objects in this extension, these should be unified #[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug, FlatSerializable)] #[repr(u64)] pub enum FillToMethod { Locf, Interpolate, Nearest, } impl FillToMethod { pub fn fill_point(&self, lhs: &TSPoint, rhs: &TSPoint, target_ts: i64) -> TSPoint { match *self { FillToMethod::Locf => TSPoint { ts: target_ts, val: lhs.val, }, FillToMethod::Interpolate => { let interval = rhs.ts as f64 - lhs.ts as f64; let left_wt = 1. - (target_ts - lhs.ts) as f64 / interval; let right_wt = 1. - (rhs.ts - target_ts) as f64 / interval; TSPoint { ts: target_ts, val: lhs.val * left_wt + rhs.val * right_wt, } } FillToMethod::Nearest => { if rhs.ts - target_ts >= target_ts - lhs.ts { TSPoint { ts: target_ts, val: lhs.val, } } else { TSPoint { ts: target_ts, val: rhs.val, } } } } } } // TODO is (immutable, parallel_safe) correct? #[pg_extern( immutable, parallel_safe, name = "fill_to", schema = "toolkit_experimental" )] pub fn fillto_pipeline_element( interval: crate::raw::Interval, fill_method: String, ) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { unsafe { let interval = interval.0.cast_mut_ptr::() as *const pg_sys::Interval; // TODO: store the postgres interval object and use postgres timestamp/interval functions let interval = ((*interval).month as i64 * 30 + (*interval).day as i64) * 24 * 60 * 60 * 1000000 + (*interval).time; let fill_method = match fill_method.to_lowercase().as_str() { "locf" => FillToMethod::Locf, "interpolate" => FillToMethod::Interpolate, "linear" => FillToMethod::Interpolate, "nearest" => FillToMethod::Nearest, _ => panic!("Invalid fill method"), }; Element::FillTo { interval, fill_method, } .flatten() } } pub fn fill_to<'s>( series: Timevector_TSTZ_F64<'s>, element: &toolkit_experimental::Element, ) -> Timevector_TSTZ_F64<'s> { let (interval, method) = match element { Element::FillTo { interval, fill_method, } => (*interval, fill_method), _ => unreachable!(), }; if !series.is_sorted() { panic!("Timevector must be sorted prior to passing to fill_to") } if series.has_nulls() { // TODO: This should be supportable outside of FillMode::Interpolate panic!("Fill_to requires a timevector to not have NULL values") } let mut result = vec![]; let mut it = series.iter().peekable(); let mut current = it.next(); while let (Some(lhs), Some(rhs)) = (current, it.peek()) { if rhs.ts - lhs.ts > interval { let mut target = lhs.ts + interval; while target < rhs.ts { result.push(method.fill_point(&lhs, rhs, target)); target += interval; } } current = it.next(); } if result.is_empty() { return series; } let mut result: Vec = series.iter().chain(result.into_iter()).collect(); result.sort_by_key(|p| p.ts); let nulls_len = result.len().div_ceil(8); build! { Timevector_TSTZ_F64 { num_points: result.len() as _, flags: series.flags, internal_padding: [0; 3], points: result.into(), null_val: std::vec::from_elem(0_u8, nulls_len).into(), } } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_pipeline_fill_to() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-04 UTC'::TIMESTAMPTZ, 90.0), \ ('2020-01-06 UTC'::TIMESTAMPTZ, 30), \ ('2020-01-09 UTC'::TIMESTAMPTZ, 40.0)", None, &[], ) .unwrap(); let val = client.update( "SELECT (timevector(time, value) -> fill_to('24 hours', 'locf'))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:9,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-02 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-04 00:00:00+00\",val:90),\ (ts:\"2020-01-05 00:00:00+00\",val:90),\ (ts:\"2020-01-06 00:00:00+00\",val:30),\ (ts:\"2020-01-07 00:00:00+00\",val:30),\ (ts:\"2020-01-08 00:00:00+00\",val:30),\ (ts:\"2020-01-09 00:00:00+00\",val:40)\ ],null_val:[0,0])" ); let val = client.update( "SELECT (timevector(time, value) -> fill_to('24 hours', 'linear'))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:9,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-04 00:00:00+00\",val:90),\ (ts:\"2020-01-05 00:00:00+00\",val:60),\ (ts:\"2020-01-06 00:00:00+00\",val:30),\ (ts:\"2020-01-07 00:00:00+00\",val:33.33333333333334),\ (ts:\"2020-01-08 00:00:00+00\",val:36.66666666666667),\ (ts:\"2020-01-09 00:00:00+00\",val:40)\ ],null_val:[0,0])" ); let val = client.update( "SELECT (timevector(time, value) -> fill_to('24 hours', 'nearest'))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:9,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-02 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-04 00:00:00+00\",val:90),\ (ts:\"2020-01-05 00:00:00+00\",val:90),\ (ts:\"2020-01-06 00:00:00+00\",val:30),\ (ts:\"2020-01-07 00:00:00+00\",val:30),\ (ts:\"2020-01-08 00:00:00+00\",val:40),\ (ts:\"2020-01-09 00:00:00+00\",val:40)\ ],null_val:[0,0])" ); let val = client.update( "SELECT (timevector(time, value) -> fill_to('10 hours', 'nearest'))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:22,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-01 10:00:00+00\",val:10),\ (ts:\"2020-01-01 20:00:00+00\",val:10),\ (ts:\"2020-01-02 06:00:00+00\",val:20),\ (ts:\"2020-01-02 16:00:00+00\",val:20),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-03 10:00:00+00\",val:20),\ (ts:\"2020-01-03 20:00:00+00\",val:90),\ (ts:\"2020-01-04 00:00:00+00\",val:90),\ (ts:\"2020-01-04 10:00:00+00\",val:90),\ (ts:\"2020-01-04 20:00:00+00\",val:90),\ (ts:\"2020-01-05 06:00:00+00\",val:30),\ (ts:\"2020-01-05 16:00:00+00\",val:30),\ (ts:\"2020-01-06 00:00:00+00\",val:30),\ (ts:\"2020-01-06 10:00:00+00\",val:30),\ (ts:\"2020-01-06 20:00:00+00\",val:30),\ (ts:\"2020-01-07 06:00:00+00\",val:30),\ (ts:\"2020-01-07 16:00:00+00\",val:40),\ (ts:\"2020-01-08 02:00:00+00\",val:40),\ (ts:\"2020-01-08 12:00:00+00\",val:40),\ (ts:\"2020-01-08 22:00:00+00\",val:40),\ (ts:\"2020-01-09 00:00:00+00\",val:40)\ ],null_val:[0,0,0])" ); }); } } ================================================ FILE: extension/src/time_vector/pipeline/filter.rs ================================================ use pgrx::*; use super::*; // TODO is (stable, parallel_safe) correct? #[pg_extern( immutable, parallel_safe, name = "filter", schema = "toolkit_experimental" )] pub fn filter_lambda_pipeline_element<'l>( lambda: toolkit_experimental::Lambda<'l>, ) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { let expression = lambda.parse(); if expression.ty() != &lambda::Type::Bool { panic!("invalid lambda type: the lambda must return a BOOLEAN") } Element::FilterLambda { lambda: lambda.into_data(), } .flatten() } pub fn apply_lambda_to<'a>( mut series: Timevector_TSTZ_F64<'a>, lambda: &lambda::LambdaData<'_>, ) -> Timevector_TSTZ_F64<'a> { let expression = lambda.parse(); if expression.ty() != &lambda::Type::Bool { panic!("invalid lambda type: the lambda must return a BOOLEAN") } let mut executor = lambda::ExpressionExecutor::new(&expression); let invoke = |time: i64, value: f64| { use lambda::Value::*; executor.reset(); let result = executor.exec(value, time); match result { Bool(b) => b, _ => unreachable!(), } }; filter_lambda_over_series(&mut series, invoke); series } pub fn filter_lambda_over_series( series: &mut Timevector_TSTZ_F64<'_>, mut func: impl FnMut(i64, f64) -> bool, ) { series.points.as_owned().retain(|p| func(p.ts, p.val)); series.num_points = series.points.len() as _; } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_pipeline_filter_lambda() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); let val = client.update( "SELECT (timevector(time, value) -> filter($$ $time != '2020-01-05't and ($value = 10 or $value = 20) $$))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:2,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20)\ ],null_val:[0])" ); }); } } ================================================ FILE: extension/src/time_vector/pipeline/lambda/executor.rs ================================================ use pgrx::*; use super::*; pub struct ExpressionExecutor<'e, T> { exprs: &'e Expression, var_vals: Vec>, tracer: T, } impl<'e> ExpressionExecutor<'e, ()> { pub fn new(exprs: &'e Expression) -> Self { Self::with_tracer(exprs, ()) } } impl<'e, T> ExpressionExecutor<'e, T> where T: Tracer, { pub fn with_fn_tracer(exprs: &'e Expression, tracer: T) -> Self where T: FnMut(&ExpressionSegment, &Value), { Self::with_tracer(exprs, tracer) } pub fn with_tracer(exprs: &'e Expression, tracer: T) -> Self { Self { var_vals: vec![None; exprs.variables.len()], exprs, tracer, } } pub fn reset(&mut self) { for v in &mut self.var_vals { *v = None } } pub fn exec(&mut self, value: f64, time: i64) -> Value { self.exec_expression(&self.exprs.expr, value, time) } fn exec_expression( &mut self, expr: &ExpressionSegment, value: f64, time: i64, // trace_function: impl FnMut(&ExpressionSegment, &Value), ) -> Value { use ExpressionSegment::*; let res = match expr { ValueVar => Value::Double(value), TimeVar => Value::Time(time), DoubleConstant(f) => Value::Double(*f), TimeConstant(t) => Value::Time(*t), IntervalConstant(i) => Value::Interval(*i), UserVar(i, _) => self.force_var(*i, value, time), FunctionCall(function, args) => self.exec_function(function, args, value, time), Unary(op, expr, ty) => self.exec_unary_op(*op, ty, expr, value, time), Binary(op, left, right, ty) => self.exec_binary_op(*op, ty, left, right, value, time), BuildTuple(exprs, _) => Value::Tuple( exprs .iter() .map(|e| self.exec_expression(e, value, time)) .collect(), ), }; self.tracer.trace(expr, &res); res } fn force_var(&mut self, i: usize, value: f64, time: i64) -> Value { if let Some(value) = &self.var_vals[i] { return value.clone(); } let value = self.exec_expression(&self.exprs.variables[i], value, time); self.var_vals[i] = Some(value.clone()); value } fn exec_function( &mut self, function: &Function, args: &[ExpressionSegment], value: f64, time: i64, ) -> Value { use Function::*; macro_rules! unary_function { ($func:ident ( )) => {{ let then = self.exec_expression(&args[0], value, time).float(); then.$func().into() }}; } macro_rules! binary_function { ($func:ident ( )) => {{ let args = &args[0..2]; let a = self.exec_expression(&args[0], value, time).float(); let b = self.exec_expression(&args[1], value, time).float(); a.$func(b).into() }}; } match function { Abs => unary_function!(abs()), Cbrt => unary_function!(cbrt()), Ceil => unary_function!(ceil()), Floor => unary_function!(floor()), Ln => unary_function!(ln()), Log10 => unary_function!(log10()), Log => { let base = self.exec_expression(&args[1], value, time).float(); let a = self.exec_expression(&args[0], value, time).float(); a.log(base).into() } Pi => std::f64::consts::PI.into(), Round => unary_function!(round()), Sign => unary_function!(signum()), Sqrt => unary_function!(sqrt()), Trunc => unary_function!(trunc()), Acos => unary_function!(acos()), Asin => unary_function!(asin()), Atan => unary_function!(atan()), Atan2 => binary_function!(atan2()), Cos => unary_function!(cos()), Sin => unary_function!(sin()), Tan => unary_function!(tan()), Sinh => unary_function!(sinh()), Cosh => unary_function!(cosh()), Tanh => unary_function!(tanh()), Asinh => unary_function!(asinh()), Acosh => unary_function!(acosh()), Atanh => unary_function!(atanh()), } } fn exec_unary_op( &mut self, op: UnaryOp, ty: &Type, expr: &ExpressionSegment, value: f64, time: i64, ) -> Value { use Type::*; use UnaryOp::*; match op { Not => { let val = self.exec_expression(expr, value, time).bool(); (!val).into() } Negative => { match ty { Double => { let val = self.exec_expression(expr, value, time).float(); (-val).into() } // TODO interval? _ => unreachable!(), } } } } fn exec_binary_op( &mut self, op: BinOp, ty: &Type, left: &ExpressionSegment, right: &ExpressionSegment, value: f64, time: i64, ) -> Value { use BinOp::*; use Type::*; // FIXME pgrx wraps all functions in rust wrappers, which makes them // uncallable with DirectFunctionCall(). Is there a way to // export both? // TODO This is fixed in a newer pgrx version, should remove after upgrade // XXX `NodeTag` somewhere inside `pg_sys::FunctionCallInfo` triggers // `improper_ctypes` lint. The `pgrx` author explains the issue in // details here: // // https://github.com/rust-lang/rust/issues/116831 // // For now it seems OK to suppress these warnings here with // #[allow(improper_ctypes)] #[allow(improper_ctypes)] unsafe extern "C-unwind" { fn interval_pl(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; fn interval_mi(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; fn interval_mul(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; fn interval_div(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; fn timestamptz_pl_interval(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; fn timestamptz_mi_interval(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; } macro_rules! float_op { (($left: ident, $right: ident) $calc: expr) => {{ let $left = self.exec_expression(left, value, time).float(); let $right = self.exec_expression(right, value, time).float(); ($calc).into() }}; } macro_rules! interval_op { (($left: ident, $right: ident) $calc: ident) => {{ let left = self.exec_expression(left, value, time).interval(); let right = self.exec_expression(right, value, time).interval(); let res: *mut pg_sys::Interval = unsafe { pg_sys::DirectFunctionCall2Coll( Some($calc), pg_sys::InvalidOid, pg_sys::Datum::from(left), pg_sys::Datum::from(right), ) .cast_mut_ptr() }; assert!(!res.is_null()); Value::Interval(res) }}; } macro_rules! interval_float_op { (($left: ident, $right: ident) $calc: ident) => {{ let left = self.exec_expression(left, value, time).interval(); let right = self.exec_expression(right, value, time).float(); let res: *mut pg_sys::Interval = unsafe { pg_sys::DirectFunctionCall2Coll( Some($calc), pg_sys::InvalidOid, pg_sys::Datum::from(left), right.into_datum().unwrap(), ) .value() as _ }; assert!(!res.is_null()); Value::Interval(res) }}; } macro_rules! time_op { (($left: ident, $right: ident) $calc: ident) => {{ let left = self.exec_expression(left, value, time).time(); let right = self.exec_expression(right, value, time).interval(); let res: i64 = unsafe { pg_sys::DirectFunctionCall2Coll( Some($calc), pg_sys::InvalidOid, pg_sys::Datum::from(left), pg_sys::Datum::from(right), ) .value() as _ }; Value::Time(res) }}; } match op { // arithmetic operators Plus => match ty { Double => float_op!((left, right) left + right), Time => time_op!((left, right) timestamptz_pl_interval), Interval => interval_op!((left, right) interval_pl), _ => unreachable!(), }, Minus => match ty { Double => float_op!((left, right) left - right), Time => time_op!((left, right) timestamptz_mi_interval), Interval => interval_op!((left, right) interval_mi), _ => unreachable!(), }, Mul => match ty { Double => float_op!((left, right) left * right), Interval => interval_float_op!((left, right) interval_mul), _ => unreachable!(), }, Div => match ty { Double => float_op!((left, right) left / right), Interval => interval_float_op!((left, right) interval_div), _ => unreachable!(), }, Pow => float_op!((left, right) left.powf(right)), // comparison operators Eq => { let left = self.exec_expression(left, value, time); let right = self.exec_expression(right, value, time); (left == right).into() } Neq => { let left = self.exec_expression(left, value, time); let right = self.exec_expression(right, value, time); (left != right).into() } Lt => { let left = self.exec_expression(left, value, time); let right = self.exec_expression(right, value, time); (left < right).into() } Gt => { let left = self.exec_expression(left, value, time); let right = self.exec_expression(right, value, time); (left > right).into() } Le => { let left = self.exec_expression(left, value, time); let right = self.exec_expression(right, value, time); (left <= right).into() } Ge => { let left = self.exec_expression(left, value, time); let right = self.exec_expression(right, value, time); (left >= right).into() } // boolean operators And => { let left = self.exec_expression(left, value, time).bool(); if !left { return false.into(); } self.exec_expression(right, value, time) } Or => { let left = self.exec_expression(left, value, time).bool(); if left { return true.into(); } self.exec_expression(right, value, time) } } } } pub trait Tracer { fn trace(&mut self, expr: &ExpressionSegment, result: &Value); } impl Tracer for () { fn trace(&mut self, _: &ExpressionSegment, _: &Value) {} } impl Tracer for T where T: FnMut(&ExpressionSegment, &Value), { fn trace(&mut self, expr: &ExpressionSegment, result: &Value) { self(expr, result) } } ================================================ FILE: extension/src/time_vector/pipeline/lambda/lambda_expr.pest ================================================ calculation = _{ SOI ~ let_expr ~ EOI } let_expr = { ("let" ~ var ~ "=" ~ tuple ~ ";")* ~ tuple } tuple = { binops ~ ("," ~ binops)* } binops = { unary ~ (operation ~ unary)* } unary = _{ neg | not | term } neg = { "-" ~ unary } not = { ^"not" ~ unary } term = _{ val_var | time_var | var | time | interval | num | function | "(" ~ let_expr ~ ")" } function = { function_name ~ "(" ~ (binops ~ ("," ~ binops)* ~ ","?)? ~ ")" } operation = _{ add | subtract | multiply | divide | power | eq | neq | le | ge | lt | gt | and | or } add = { "+" } subtract = { "-" } multiply = { "*" } divide = { "/" } power = { "^" } eq = { "=" } neq = { "!=" | "<>" } lt = { "<" } le = { "<=" } gt = { ">" } ge = { ">=" } and = { ^"and" } or = { ^"or" } num = @{ int ~ ("." ~ ASCII_DIGIT*)? ~ (^"e" ~ int)? } int = { ("+" | "-")? ~ ASCII_DIGIT+ } time_var = @{ ^"$time" } val_var = @{ ^"$value" } time = @{ string ~ "t" } interval = @{ string ~ "i" } string = _{ "'" ~ (!"'" ~ ANY)* ~ "'" } var = @{ "$" ~ (ASCII_ALPHANUMERIC | "_")+ } function_name = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* } WHITESPACE = _{ " " | "\t" | NEWLINE } ================================================ FILE: extension/src/time_vector/pipeline/lambda/parser.rs ================================================ use std::{collections::HashMap, ffi::CString}; use pgrx::*; use super::*; use pest::{ iterators::{Pair, Pairs}, prec_climber::{Assoc, Operator, PrecClimber}, Parser, }; use ExpressionSegment::*; use Rule::*; use Type::*; use UnaryOp::*; // Idealized expression grammar ignoring precedence // ``` // Expression := 'let' Variable '=' Expression ';' Expression | BinaryExpression // BinaryExpression := PrefixExpression ({',', '+', '-', '*', ...} BinaryExpression) // PrefixExpression := {'-', 'NOT'} ParenExpression // ParenExpression := '(' Expression ')' | Variable | Literal // Variable := $[a-bA-B_][a-bA-B0-9_]* // Literal := | '' // ``` // Josh - I believe this is unambiguous and LL(1), but we should check before // stabilization // FIXME check the grammar #[derive(pest_derive::Parser)] #[grammar = "time_vector/pipeline/lambda/lambda_expr.pest"] // relative to src pub struct ExpressionParser; pub fn parse_expression(input: &str) -> Expression { let parsed = ExpressionParser::parse(calculation, input).unwrap_or_else(|e| panic!("{}", e)); let mut variables = Vec::new(); let expr = build_expression(parsed, &mut variables, &mut HashMap::new()); Expression { variables, expr } } // main parsing function. fn build_expression<'a>( parsed: Pairs<'a, Rule>, var_expressions: &mut Vec, known_vars: &mut HashMap<&'a str, (Type, usize)>, ) -> ExpressionSegment { // Everything except binary operations are handled by `parse_primary()` // when we encounter a sequence of binary operations eg `<> + <> * <>` // the `(Expression, op, Expression)` triple is passed to `build_binary_op()` // in descending precedence order. PREC_CLIMBER.climb( parsed, |pair| parse_primary(pair, var_expressions, known_vars), |left: ExpressionSegment, op: Pair, right: ExpressionSegment| { build_binary_op(op, left, right) }, ) } // handles everything except infix binary operators, which are handled by the // precedence climber and `build_binary_op()` fn parse_primary<'a>( pair: Pair<'a, Rule>, var_expressions: &mut Vec, known_vars: &mut HashMap<&'a str, (Type, usize)>, ) -> ExpressionSegment { // HOW TO READ: // every rule (the left hand side of the `=` in the `.pest` file) has a // variant in the following `match` statement. When seeing a rule like // ``` // foo = { bar ~ "baz" ~ qux } // ``` // 1. `pair.as_str()` will be the entire string that matched the rule. // 2. `pair.into_iterator()` returns an iterator over the `Pair`s // representing the sub rules. In this case it'll return two, one for // `bar` and one for `qux`. These 'Pair's can be passed back to // `parse_primary()` to parse them into `Expression`s for further // handling. match pair.as_rule() { num => { let val: f64 = pair.as_str().parse().unwrap(); DoubleConstant(val) } val_var => ValueVar, time_var => TimeVar, time => { let s = pair.as_str(); let parsed_time = parse_timestamptz(&s[1..s.len() - 2]); TimeConstant(parsed_time) } interval => { let s = pair.as_str(); let parsed_interval = parse_interval(&s[1..s.len() - 2]); IntervalConstant(parsed_interval) } var => { let (ty, v) = known_vars .get(pair.as_str()) .unwrap_or_else(|| panic!("unknown variable: {}", pair.as_str())) .clone(); UserVar(v, ty) } function => { let mut pairs = pair.into_inner(); let func_name = pairs.next().unwrap(); let (num_args, func_id) = *BUILTIN_FUNCTION .get(func_name.as_str()) .unwrap_or_else(|| panic!("unknown function: {}", func_name.as_str())); let args: Vec<_> = pairs .map(|p| parse_primary(p, var_expressions, known_vars)) .collect(); if args.len() != num_args { panic!( "function `{}` expects {} arguments and received {}", func_name.as_str(), num_args, args.len(), ) } FunctionCall(func_id, args) } neg => { let value = pair.into_inner().next().unwrap(); let value = parse_primary(value, var_expressions, known_vars); if value.ty() != &Double { panic!("can only apply `-` to a DOUBLE PRECISION") } Unary(Negative, value.into(), Double) } not => { let value = pair.into_inner().next().unwrap(); let value = parse_primary(value, var_expressions, known_vars); if value.ty() != &Bool { panic!("can only apply NOT to a BOOLEAN") } Unary(Not, value.into(), Bool) } // pass the sequence of binary operation to the precedence_climber to handle binops => build_expression(pair.into_inner(), var_expressions, known_vars), let_expr => { let mut pairs = pair.into_inner(); loop { // let_expr has two forms // `let = ; ` and `` // if we have more than one sub-pair in our pairs then we know we're // in the first state, otherwise we must be in the second. let var_name_or_expr = pairs.next().unwrap(); let var_value = match pairs.next() { None => return parse_primary(var_name_or_expr, var_expressions, known_vars), Some(val) => val, }; let var_value = parse_primary(var_value, var_expressions, known_vars); let var_name = var_name_or_expr.as_str(); known_vars .entry(var_name) .and_modify(|_| panic!("duplicate var {var_name}")) .or_insert_with(|| (var_value.ty().clone(), var_expressions.len())); var_expressions.push(var_value); } } tuple => { // the tuple rule effectively has two forms // `` and ` (, )+` // it's only in the second case that we'll actually build something // of a tuple type, in the former we'll just turn into the inner // expression. let mut pairs = pair.into_inner(); let first = pairs.next().unwrap(); let first_val = parse_primary(first, var_expressions, known_vars); match pairs.next() { None => first_val, Some(pair) => { let mut vals = vec![first_val]; let val = parse_primary(pair, var_expressions, known_vars); vals.push(val); for p in pairs { let val = parse_primary(p, var_expressions, known_vars); vals.push(val); } let ty = Tuple(vals.iter().map(|v| v.ty().clone()).collect()); BuildTuple(vals, ty) } } } // operations marked with a `_` or that are below a `@` are never passed // to us, so we can ignore them. EOI | int | operation | string | unary | term | function_name | WHITESPACE | calculation => unreachable!("{} should be transparent", pair), // infix operations should be passed to `build_binary_op()` by the // precedence climber, so we should never see them here. add | subtract | multiply | divide | power | eq | neq | lt | le | gt | ge | and | or => { unreachable!("{} should be handled by precedence climbing", pair) } } } fn build_binary_op( op: Pair, left: ExpressionSegment, right: ExpressionSegment, ) -> ExpressionSegment { use BinOp::*; use Type::Interval; macro_rules! return_ty { ($op:literal $(($l: pat, $r:pat) => $ty:expr),+ $(,)?) => { match (left.ty(), right.ty()) { $(($l, $r) => $ty,)+ // TODO the error should report the location (l, r) => panic!( concat!("no operator `{:?} {op} {:?}` only ", $("`", stringify!($l), " {op} ", stringify!($r), "` ",)+ ), l, r, op=$op), } }; } match op.as_rule() { add => { let result_type = return_ty!("+" (Double, Double) => Double, (Type::Time, Interval) => Type::Time, (Interval, Interval) => Interval, ); Binary(Plus, left.into(), right.into(), result_type) } subtract => { let result_type = return_ty!("-" (Double, Double) => Double, (Type::Time, Interval) => Type::Time, (Interval, Interval) => Interval, ); Binary(Minus, left.into(), right.into(), result_type) } multiply => match (left.ty(), right.ty()) { (Double, Double) => Binary(Mul, left.into(), right.into(), Double), (Interval, Double) => Binary(Mul, left.into(), right.into(), Interval), // TODO right now BinOp(Mul, .., Interval) expects the interval on the left // and the double on the left. We could check in the executor which one // actually is, but it seems easier to just revers the value here if // they're in an unexpected order. (Double, Interval) => Binary(Mul, right.into(), left.into(), Interval), (l, r) => { panic!("no operator `{l:?} * {r:?}` only `DOUBLE * DOUBLE` and `INTERVAL * FLOAT`") } }, divide => { let result_type = return_ty!("/" (Double, Double) => Double, (Interval, Double) => Interval, ); Binary(Div, left.into(), right.into(), result_type) } power => { let result_type = return_ty!("^" (Double, Double) => Double, ); Binary(Pow, left.into(), right.into(), result_type) } eq => { if left.ty() != right.ty() { panic!( "mismatched types for `=`: {:?}, {:?}", left.ty(), right.ty() ) } Binary(Eq, left.into(), right.into(), Bool) } neq => { if left.ty() != right.ty() { panic!( "mismatched types for `!=`: {:?}, {:?}", left.ty(), right.ty() ) } Binary(Neq, left.into(), right.into(), Bool) } lt => { if left.ty() != right.ty() { panic!( "mismatched types for `<`: {:?}, {:?}", left.ty(), right.ty() ) } Binary(Lt, left.into(), right.into(), Bool) } le => { if left.ty() != right.ty() { panic!( "mismatched types for `<=`: {:?}, {:?}", left.ty(), right.ty() ) } Binary(Le, left.into(), right.into(), Bool) } gt => { if left.ty() != right.ty() { panic!( "mismatched types for `>`: {:?}, {:?}", left.ty(), right.ty() ) } Binary(Gt, left.into(), right.into(), Bool) } ge => { if left.ty() != right.ty() { panic!( "mismatched types for `>=`: {:?}, {:?}", left.ty(), right.ty() ) } Binary(Ge, left.into(), right.into(), Bool) } and => { let result_type = return_ty!("and" (Bool, Bool) => Bool, ); Binary(And, left.into(), right.into(), result_type) } or => { let result_type = return_ty!("or" (Bool, Bool) => Bool, ); Binary(Or, left.into(), right.into(), result_type) } _ => unreachable!(), } } fn parse_timestamptz(val: &str) -> i64 { // FIXME pgrx wraps all functions in rust wrappers, which makes them // uncallable with DirectFunctionCall(). Is there a way to // export both? unsafe extern "C-unwind" { #[allow(improper_ctypes)] fn timestamptz_in(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; } let cstr = CString::new(val).unwrap(); let parsed_time = unsafe { pg_sys::DirectFunctionCall3Coll( Some(timestamptz_in), pg_sys::InvalidOid as _, pg_sys::Datum::from(cstr.as_ptr()), pg_sys::Datum::from(pg_sys::InvalidOid), pg_sys::Datum::from(-1i32), ) }; parsed_time.value() as _ } fn parse_interval(val: &str) -> *mut pg_sys::Interval { // FIXME pgrx wraps all functions in rust wrappers, which makes them // uncallable with DirectFunctionCall(). Is there a way to // export both? unsafe extern "C-unwind" { #[allow(improper_ctypes)] fn interval_in(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; } let cstr = CString::new(val).unwrap(); let parsed_interval = unsafe { pg_sys::DirectFunctionCall3Coll( Some(interval_in), pg_sys::InvalidOid as _, pg_sys::Datum::from(cstr.as_ptr()), pg_sys::Datum::from(pg_sys::InvalidOid), pg_sys::Datum::from(-1i32), ) }; parsed_interval.cast_mut_ptr() } // This static determines the precedence of infix operators static PREC_CLIMBER: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { use Assoc::*; // operators according to their precedence, ordered in a vector // from lowest to highest. Multiple operators with the same precedence are // joined with `|` PrecClimber::new(vec![ Operator::new(or, Left), Operator::new(and, Left), Operator::new(eq, Left) | Operator::new(neq, Left) | Operator::new(lt, Left) | Operator::new(le, Left) | Operator::new(gt, Left) | Operator::new(ge, Left), Operator::new(add, Left) | Operator::new(subtract, Left), Operator::new(multiply, Left) | Operator::new(divide, Left), Operator::new(power, Right), ]) }); // Table of builtin functions (all of them for now). // Maps function name to a tuple (num arguments, function identifier) static BUILTIN_FUNCTION: once_cell::sync::Lazy> = once_cell::sync::Lazy::new(|| { use Function::*; [ ("abs", (1, Abs)), ("cbrt", (1, Cbrt)), ("ceil", (1, Ceil)), ("floor", (1, Floor)), ("ln", (1, Ln)), ("log10", (1, Log10)), ("log", (2, Log)), ("pi", (0, Pi)), ("round", (1, Round)), ("sign", (1, Sign)), ("sqrt", (1, Sqrt)), ("trunc", (1, Trunc)), ("acos", (1, Acos)), ("asin", (1, Asin)), ("atan", (1, Atan)), ("atan2", (2, Atan2)), ("cos", (1, Cos)), ("sin", (1, Sin)), ("tan", (1, Tan)), ("sinh", (1, Sinh)), ("cosh", (1, Cosh)), ("tanh", (1, Tanh)), ("asinh", (1, Asinh)), ("acosh", (1, Acosh)), ("atanh", (1, Atanh)), ] .into_iter() .collect() }); ================================================ FILE: extension/src/time_vector/pipeline/lambda.rs ================================================ use std::borrow::Cow; use pgrx::{ iter::{SetOfIterator, TableIterator}, *, }; use super::*; pub use executor::ExpressionExecutor; mod executor; mod parser; pub use self::toolkit_experimental::{Lambda, LambdaData}; #[pg_schema] pub mod toolkit_experimental { pub(crate) use super::*; // // lambda type // pg_type! { #[derive(Debug)] struct Lambda<'input> { len: u32, string: [u8; self.len], } } } impl<'input> InOutFuncs for Lambda<'input> { fn output(&self, buffer: &mut StringInfo) { use crate::serialization::{str_to_db_encoding, EncodedStr::*}; let stringified = std::str::from_utf8(self.string.as_slice()).unwrap(); match str_to_db_encoding(stringified) { Utf8(s) => buffer.push_str(s), Other(s) => buffer.push_bytes(s.to_bytes()), } } fn input(input: &std::ffi::CStr) -> Self where Self: Sized, { use crate::serialization::str_from_db_encoding; let s = str_from_db_encoding(input); // validate the string let _ = parser::parse_expression(s); unsafe { flatten! { Lambda { len: s.len() as _, string: s.as_bytes().into(), } } } } } impl<'a> LambdaData<'a> { pub fn parse(&self) -> Expression { parser::parse_expression(std::str::from_utf8(self.string.as_slice()).unwrap()) } } // // Direct lambda execution functions for testing // #[pg_extern(stable, parallel_safe, schema = "toolkit_experimental")] pub fn bool_lambda<'a>( lambda: toolkit_experimental::Lambda<'a>, time: crate::raw::TimestampTz, value: f64, ) -> bool { let expression = lambda.parse(); if expression.expr.ty() != &Type::Bool { panic!("invalid return type, must return a BOOLEAN for {expression:?}") } let mut executor = ExpressionExecutor::new(&expression); executor.exec(value, time.into()).bool() } #[pg_extern(stable, parallel_safe, schema = "toolkit_experimental")] pub fn f64_lambda<'a>( lambda: toolkit_experimental::Lambda<'a>, time: crate::raw::TimestampTz, value: f64, ) -> f64 { let expression = lambda.parse(); if expression.expr.ty() != &Type::Double { panic!("invalid return type, must return a DOUBLE PRECISION") } let mut executor = ExpressionExecutor::new(&expression); executor.exec(value, time.into()).float() } #[pg_extern(stable, parallel_safe, schema = "toolkit_experimental")] pub fn ttz_lambda<'a>( lambda: toolkit_experimental::Lambda<'a>, time: crate::raw::TimestampTz, value: f64, ) -> crate::raw::TimestampTz { let expression = lambda.parse(); if expression.expr.ty() != &Type::Time { panic!("invalid return type, must return a TimestampTZ") } let mut executor = ExpressionExecutor::new(&expression); executor.exec(value, time.into()).time().into() } use crate::raw::Interval; #[pg_extern(stable, parallel_safe, schema = "toolkit_experimental")] pub fn interval_lambda<'a>( lambda: toolkit_experimental::Lambda<'a>, time: crate::raw::TimestampTz, value: f64, ) -> Interval { let expression = lambda.parse(); if expression.expr.ty() != &Type::Interval { panic!("invalid return type, must return a INTERVAL") } let mut executor = ExpressionExecutor::new(&expression); pg_sys::Datum::from(executor.exec(value, time.into()).interval()).into() } #[pg_extern(stable, parallel_safe, schema = "toolkit_experimental")] pub fn point_lambda<'a>( lambda: toolkit_experimental::Lambda<'a>, time: crate::raw::TimestampTz, value: f64, ) -> TableIterator<'static, (name!(time, crate::raw::TimestampTz), name!(value, f64))> { let expression = lambda.parse(); if !expression.expr.ty_is_ts_point() { panic!("invalid return type, must return a (TimestampTZ, DOUBLE PRECISION)") } let mut executor = ExpressionExecutor::new(&expression); let columns = match executor.exec(value, time.into()) { Value::Tuple(columns) => columns, _ => unreachable!(), }; TableIterator::new(Some((columns[0].time().into(), columns[1].float())).into_iter()) } #[pg_extern(stable, parallel_safe, schema = "toolkit_experimental")] pub fn trace_lambda<'a>( lambda: toolkit_experimental::Lambda<'a>, time: crate::raw::TimestampTz, value: f64, ) -> SetOfIterator<'static, String> { let expression = lambda.parse(); let mut trace: Vec<_> = vec![]; let mut executor = ExpressionExecutor::with_fn_tracer(&expression, |e, v| { trace.push((e.name(), format!("{v:?}"))) }); let _ = executor.exec(value, time.into()); let col1_size = trace.iter().map(|(e, _)| e.len()).max().unwrap_or(0); SetOfIterator::new( trace .into_iter() .map(move |(e, v)| format!("{e:>col1_size$}: {v:?}")), ) } // // Common types across the parser and executor // // expressions #[derive(Debug)] pub struct Expression { variables: Vec, expr: ExpressionSegment, } #[derive(Clone, Debug)] pub enum ExpressionSegment { ValueVar, TimeVar, DoubleConstant(f64), TimeConstant(i64), IntervalConstant(*mut pg_sys::Interval), UserVar(usize, Type), Unary(UnaryOp, Box, Type), Binary(BinOp, Box, Box, Type), FunctionCall(Function, Vec), BuildTuple(Vec, Type), } #[derive(Clone, Copy, Debug)] pub enum UnaryOp { Not, Negative, } #[derive(Clone, Copy, Debug)] pub enum BinOp { Plus, Minus, Mul, Div, Pow, Eq, Lt, Le, Gt, Ge, Neq, And, Or, } #[derive(Clone, Copy, Debug)] pub enum Function { Abs, Cbrt, Ceil, Floor, Ln, Log10, Log, Pi, Round, Sign, Sqrt, Trunc, Acos, Asin, Atan, Atan2, Cos, Sin, Tan, Sinh, Cosh, Tanh, Asinh, Acosh, Atanh, } // types #[derive(Clone, Debug, PartialEq, Eq)] pub enum Type { Time, Double, Bool, Interval, Tuple(Vec), } // values #[derive(Clone, Debug)] pub enum Value { Bool(bool), Double(f64), Time(i64), Interval(*mut pg_sys::Interval), Tuple(Vec), } impl Expression { pub fn ty(&self) -> &Type { self.expr.ty() } pub fn ty_is_ts_point(&self) -> bool { self.expr.ty_is_ts_point() } } impl ExpressionSegment { pub fn ty(&self) -> &Type { use ExpressionSegment::*; use Type::*; match self { ValueVar => &Double, TimeVar => &Time, DoubleConstant(_) => &Double, TimeConstant(_) => &Time, IntervalConstant(_) => &Interval, UserVar(_, ty) => ty, FunctionCall(_, _) => &Double, Unary(_, _, ty) => ty, Binary(_, _, _, ty) => ty, BuildTuple(_, ty) => ty, } } pub fn ty_is_ts_point(&self) -> bool { let columns = match self { ExpressionSegment::BuildTuple(_, Type::Tuple(ty)) => ty, _ => return false, }; matches!(&**columns, [Type::Time, Type::Double]) } pub fn name(&self) -> Cow<'static, str> { use ExpressionSegment::*; match self { ValueVar => "$value".into(), TimeVar => "$time".into(), DoubleConstant(_) => "f64 const".into(), TimeConstant(_) => "time const".into(), IntervalConstant(_) => "interval const".into(), UserVar(i, t) => format!("user var {i}: {t:?}").into(), Unary(op, _, t) => format!("uop {op:?} {t:?}").into(), Binary(op, _, _, t) => format!("binop {op:?} {t:?}").into(), FunctionCall(f, _) => format!("function {f:?}").into(), BuildTuple(_, t) => format!("tuple {t:?}").into(), } } } impl Value { pub(crate) fn bool(&self) -> bool { match self { Value::Bool(b) => *b, _ => unreachable!(), } } pub(crate) fn float(&self) -> f64 { match self { Value::Double(f) => *f, _ => unreachable!(), } } pub(crate) fn time(&self) -> i64 { match self { Value::Time(t) => *t, _ => unreachable!(), } } pub(crate) fn interval(&self) -> *mut pg_sys::Interval { match self { Value::Interval(i) => *i, _ => unreachable!(), } } } impl PartialOrd for Value { fn partial_cmp(&self, other: &Self) -> Option { use std::mem::discriminant; use Value::*; // XXX `NodeTag` somewhere inside `pg_sys::FunctionCallInfo` triggers // `improper_ctypes` lint. The `pgrx` author explains the issue in // details here: // // https://github.com/rust-lang/rust/issues/116831 // // For now it seems OK to suppress these warnings here and below with // #[allow(improper_ctypes)] unsafe extern "C-unwind" { #[allow(improper_ctypes)] fn interval_cmp(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; } if discriminant(self) != discriminant(other) { return None; } match (self, other) { (Bool(l0), Bool(r0)) => l0.partial_cmp(r0), (Double(l0), Double(r0)) => l0.partial_cmp(r0), (Time(l0), Time(r0)) => l0.partial_cmp(r0), (Tuple(l0), Tuple(r0)) => l0.partial_cmp(r0), (Interval(l0), Interval(r0)) => unsafe { let res = pg_sys::DirectFunctionCall2Coll( Some(interval_cmp), pg_sys::InvalidOid, pg_sys::Datum::from(*l0), pg_sys::Datum::from(*r0), ) .value() as i32; res.cmp(&0).into() }, (_, _) => None, } } } impl PartialEq for Value { fn eq(&self, other: &Self) -> bool { use std::mem::discriminant; use Value::*; unsafe extern "C-unwind" { #[allow(improper_ctypes)] fn interval_eq(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum; } if discriminant(self) != discriminant(other) { return false; } match (self, other) { (Bool(l0), Bool(r0)) => l0 == r0, (Double(l0), Double(r0)) => l0 == r0, (Time(l0), Time(r0)) => l0 == r0, (Tuple(l0), Tuple(r0)) => l0 == r0, (Interval(l0), Interval(r0)) => unsafe { let res = pg_sys::DirectFunctionCall2Coll( Some(interval_eq), pg_sys::InvalidOid, pg_sys::Datum::from(*l0), pg_sys::Datum::from(*r0), ); res.value() != 0 }, (_, _) => false, } } } impl From for Value { fn from(b: bool) -> Self { Self::Bool(b) } } impl From for Value { fn from(f: f64) -> Self { Self::Double(f) } } impl<'a> Lambda<'a> { pub fn into_data(self) -> LambdaData<'a> { self.0 } } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; macro_rules! trace_lambda { ($client: expr, $expr:literal) => { $client .update( concat!("SELECT trace_lambda($$ ", $expr, " $$, '2021-01-01', 2.0)"), None, &[], ) .unwrap() .map(|r| r.get::(1).unwrap().unwrap()) .collect() }; } macro_rules! point_lambda { ($client: expr, $expr:literal) => { $client .update( concat!( "SELECT point_lambda($$ ", $expr, " $$, '2021-01-01', 2.0)::text" ), None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap() }; } macro_rules! interval_lambda { ($client: expr, $expr:literal) => { $client .update( concat!( "SELECT interval_lambda($$ ", $expr, " $$, now(), 2.0)::text" ), None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap() }; } macro_rules! f64_lambda { ($client: expr, $expr:literal) => { $client .update( concat!("SELECT f64_lambda($$ ", $expr, " $$, now(), 2.0)"), None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap() }; } macro_rules! bool_lambda { ($client: expr, $expr:literal) => { $client .update( concat!("SELECT bool_lambda($$ ", $expr, " $$, now(), 2.0)::text"), None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap() }; } macro_rules! point_lambda_eq { ($client: expr, $expr:literal, $expects:literal) => { assert_eq!(point_lambda!($client, $expr), $expects,) }; } macro_rules! interval_lambda_eq { ($client: expr, $expr:literal, $expects:literal) => { assert_eq!(interval_lambda!($client, $expr), $expects,) }; } macro_rules! f64_lambda_eq { ($client: expr, $expr:literal, $expects:expr) => { assert!((f64_lambda!($client, $expr) - ($expects)).abs() < f64::EPSILON,) }; } macro_rules! bool_lambda_eq { ($client: expr, $expr:literal, $expects:literal) => { assert_eq!(bool_lambda!($client, $expr), $expects,) }; } #[pg_test] fn test_lambda_general() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "SELECT $$ let $1 = 1.0; 2.0, $1 $$::toolkit_experimental.lambda", None, &[], ) .unwrap(); // client.update("SELECT $$ '1 day'i $$::toolkit_experimental.lambda", None, &[]).unwrap(); // client.update("SELECT $$ '2020-01-01't $$::toolkit_experimental.lambda", None, &[]).unwrap(); let res = client .update("SELECT f64_lambda($$ 1.0 $$, now(), 0.0)::text", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "1"); let res = client .update( "SELECT f64_lambda($$ 1.0 + 1.0 $$, now(), 0.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "2"); let res = client .update( "SELECT f64_lambda($$ 1.0 - 1.0 $$, now(), 0.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "0"); let res = client .update( "SELECT f64_lambda($$ 2.0 * 3.0 $$, now(), 0.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "6"); let res = client .update( "SELECT f64_lambda($$ $value + 3.0 $$, now(), 2.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "5"); let res = client .update( "SELECT f64_lambda($$ 3.0 - 1.0 * 3.0 $$, now(), 2.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "0"); bool_lambda_eq!(client, "3.0 = 3.0", "true"); bool_lambda_eq!(client, "3.0 != 3.0", "false"); bool_lambda_eq!(client, "2.0 != 3.0", "true"); bool_lambda_eq!(client, "2.0 != 3.0 and 1 = 1", "true"); bool_lambda_eq!(client, "2.0 != 3.0 and (1 = 1)", "true"); let res = client .update( "SELECT ttz_lambda($$ '2020-11-22 13:00:01't $$, now(), 2.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "2020-11-22 13:00:01+00"); let res = client .update( "SELECT ttz_lambda($$ $time $$, '1930-01-12 14:20:21', 2.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "1930-01-12 14:20:21+00"); let res = client .update( "SELECT ttz_lambda($$ '2020-11-22 13:00:01't - '1 day'i $$, now(), 2.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "2020-11-21 13:00:01+00"); let res = client .update( "SELECT ttz_lambda($$ '2020-11-22 13:00:01't + '1 day'i $$, now(), 2.0)::text", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(&*res.unwrap(), "2020-11-23 13:00:01+00"); point_lambda_eq!( client, "'2020-11-22 13:00:01't + '1 day'i, 2.0 * 3.0", r#"("2020-11-23 13:00:01+00",6)"# ); point_lambda_eq!( client, "($time, $value^2 + $value * 2.3 + 43.2)", r#"("2021-01-01 00:00:00+00",51.800000000000004)"# ); }); } #[pg_test] fn test_lambda_comparison() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); bool_lambda_eq!(client, "2.0 < 3.0", "true"); bool_lambda_eq!(client, "2.0 <= 3.0", "true"); bool_lambda_eq!(client, "2.0 > 3.0", "false"); bool_lambda_eq!(client, "2.0 >= 3.0", "false"); bool_lambda_eq!(client, "4.0 > 3.0", "true"); bool_lambda_eq!(client, "4.0 >= 3.0", "true"); bool_lambda_eq!(client, "4.0 > 4.0", "false"); bool_lambda_eq!(client, "4.0 >= 4.0", "true"); bool_lambda_eq!(client, "'2020-01-01't < '2021-01-01't", "true"); bool_lambda_eq!(client, "'2020-01-01't <= '2021-01-01't", "true"); bool_lambda_eq!(client, "'2020-01-01't > '2021-01-01't", "false"); bool_lambda_eq!(client, "'2020-01-01't >= '2021-01-01't", "false"); bool_lambda_eq!(client, "'2022-01-01't < '2021-01-01't", "false"); bool_lambda_eq!(client, "'2022-01-01't <= '2021-01-01't", "false"); bool_lambda_eq!(client, "'2022-01-01't > '2021-01-01't", "true"); bool_lambda_eq!(client, "'2022-01-01't >= '2021-01-01't", "true"); bool_lambda_eq!(client, "'2022-01-01't > '2021-01-01't", "true"); bool_lambda_eq!(client, "'2022-01-01't >= '2021-01-01't", "true"); bool_lambda_eq!(client, "'1 day'i < '1 week'i", "true"); bool_lambda_eq!(client, "'1 day'i <= '1 week'i", "true"); bool_lambda_eq!(client, "'1 day'i > '1 week'i", "false"); bool_lambda_eq!(client, "'1 day'i >= '1 week'i ", "false"); bool_lambda_eq!(client, "'1 year'i > '1 week'i", "true"); bool_lambda_eq!(client, "'1 year'i >= '1 week'i", "true"); bool_lambda_eq!(client, "'1 year'i > '1 year'i", "false"); bool_lambda_eq!(client, "'1 year'i >= '1 year'i", "true"); }); } #[pg_test] fn test_lambda_function() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); f64_lambda_eq!(client, "pi()", std::f64::consts::PI); f64_lambda_eq!(client, "abs(-2.0)", (-2.0f64).abs()); f64_lambda_eq!(client, "cbrt(-2.0)", (-2.0f64).cbrt()); f64_lambda_eq!(client, "ceil(-2.1)", (-2.1f64).ceil()); f64_lambda_eq!(client, "floor(-2.1)", (-2.1f64).floor()); f64_lambda_eq!(client, "ln(2.0)", (2.0f64).ln()); f64_lambda_eq!(client, "log10(2.0)", (2.0f64).log10()); f64_lambda_eq!(client, "round(-2.1)", (-2.1f64).round()); f64_lambda_eq!(client, "sign(-2.0)", (-2.0f64).signum()); f64_lambda_eq!(client, "sqrt(2.0)", (2.0f64).sqrt()); f64_lambda_eq!(client, "trunc(-2.0)", (-2.0f64).trunc()); f64_lambda_eq!(client, "acos(0.2)", (0.2f64).acos()); f64_lambda_eq!(client, "asin(0.2)", (0.2f64).asin()); f64_lambda_eq!(client, "atan(0.2)", (0.2f64).atan()); f64_lambda_eq!(client, "cos(2.0)", (2.0f64).cos()); f64_lambda_eq!(client, "sin(2.0)", (2.0f64).sin()); f64_lambda_eq!(client, "tan(2.0)", (2.0f64).tan()); f64_lambda_eq!(client, "sinh(2.0)", (2.0f64).sinh()); f64_lambda_eq!(client, "cosh(2.0)", (2.0f64).cosh()); f64_lambda_eq!(client, "tanh(2.0)", (2.0f64).tanh()); f64_lambda_eq!(client, "asinh(1.0)", (1.0f64).asinh()); f64_lambda_eq!(client, "acosh(1.0)", (1.0f64).acosh()); f64_lambda_eq!(client, "atanh(0.9)", (0.9f64).atanh()); f64_lambda_eq!(client, "log(2.0, 10)", 2.0f64.log(10.0)); f64_lambda_eq!(client, "atan2(2.0, 10)", 2.0f64.atan2(10.0)); }); } #[pg_test] fn test_lambda_unary() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); f64_lambda_eq!(client, "-(2.0)", -2.0f64); f64_lambda_eq!(client, "-(-2.0)", 2.0f64); bool_lambda_eq!(client, "not (1 = 1)", "false"); bool_lambda_eq!(client, "not (1 = 2)", "true"); bool_lambda_eq!(client, "not not (1 = 1)", "true"); bool_lambda_eq!(client, "not not (1 = 2)", "false"); bool_lambda_eq!(client, "not (1 <> 1)", "true"); bool_lambda_eq!(client, "not (1 <> 2)", "false"); }); } #[pg_test] fn test_lambda_interval_ops() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); interval_lambda_eq!(client, "'1 day'i + '1 day'i", "2 days"); interval_lambda_eq!(client, "'1 day'i + '1 week'i", "8 days"); interval_lambda_eq!(client, "'1 week'i - '1 day'i", "6 days"); interval_lambda_eq!(client, "'1 day'i * 3", "3 days"); interval_lambda_eq!(client, "4 * '1 day'i", "4 days"); interval_lambda_eq!(client, "'4 day'i / 4", "1 day"); }); } #[pg_test] fn test_lambda_variable() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); f64_lambda_eq!(client, "let $foo = 2.0; $foo", 2.0); f64_lambda_eq!(client, "let $foo = -2.0; $foo", -2.0); f64_lambda_eq!(client, "let $foo = abs(-2.0); $foo", 2.0); f64_lambda_eq!(client, "let $foo = abs(-2.0); $foo * $foo", 4.0); bool_lambda_eq!(client, "let $foo = 1 = 1; $foo", "true"); bool_lambda_eq!(client, "let $foo = 1 = 1; $foo and $foo", "true"); bool_lambda_eq!(client, "let $foo = 1 = 1; $foo or $foo", "true"); // verify that variables are only expanded once let rows: Vec<_> = trace_lambda!(client, "let $bar = 1 + 1; $bar + $bar + $bar"); assert_eq!( &*rows, [ r#" f64 const: "Double(1.0)""#, r#" f64 const: "Double(1.0)""#, r#" binop Plus Double: "Double(2.0)""#, r#"user var 0: Double: "Double(2.0)""#, r#"user var 0: Double: "Double(2.0)""#, r#" binop Plus Double: "Double(4.0)""#, r#"user var 0: Double: "Double(2.0)""#, r#" binop Plus Double: "Double(6.0)""#, ], ); let rows: Vec<_> = trace_lambda!( client, "let $foo = -2;\nlet $bar = $foo * $foo;\n $bar * $bar" ); assert_eq!( &*rows, [ // TODO try and fix parsing so than `-2` parses as a constant `-2` r#" f64 const: "Double(2.0)""#, r#"uop Negative Double: "Double(-2.0)""#, r#" user var 0: Double: "Double(-2.0)""#, r#" user var 0: Double: "Double(-2.0)""#, r#" binop Mul Double: "Double(4.0)""#, r#" user var 1: Double: "Double(4.0)""#, r#" user var 1: Double: "Double(4.0)""#, r#" binop Mul Double: "Double(16.0)""#, ], ); }); } } ================================================ FILE: extension/src/time_vector/pipeline/map.rs ================================================ use std::{ mem::{self, ManuallyDrop, MaybeUninit}, ptr, }; use pgrx::*; use super::*; use crate::serialization::PgProcId; // TODO is (stable, parallel_safe) correct? #[pg_extern( immutable, parallel_safe, name = "map", schema = "toolkit_experimental" )] pub fn map_lambda_pipeline_element<'l>( lambda: toolkit_experimental::Lambda<'l>, ) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { let expression = lambda.parse(); if expression.ty() != &lambda::Type::Double && !expression.ty_is_ts_point() { panic!("invalid lambda type: the lambda must return a DOUBLE PRECISION or (TimestampTZ, DOUBLE PRECISION)") } Element::MapLambda { lambda: lambda.into_data(), } .flatten() } pub fn apply_lambda_to<'a>( mut series: Timevector_TSTZ_F64<'a>, lambda: &lambda::LambdaData<'_>, ) -> Timevector_TSTZ_F64<'a> { let expression = lambda.parse(); let only_val = expression.ty() == &lambda::Type::Double; if !only_val && !expression.ty_is_ts_point() { panic!("invalid lambda type: the lambda must return a DOUBLE PRECISION or (TimestampTZ, DOUBLE PRECISION)") } let mut executor = lambda::ExpressionExecutor::new(&expression); let invoke = |time: i64, value: f64| { use lambda::Value::*; executor.reset(); let result = executor.exec(value, time); match result { Double(f) => (None, Some(f)), Time(t) => (Some(t), None), Tuple(cols) => match &*cols { [Time(t), Double(f)] => (Some(*t), Some(*f)), _ => unreachable!(), }, _ => unreachable!(), } }; map_lambda_over_series(&mut series, only_val, invoke); series } pub fn map_lambda_over_series( series: &mut Timevector_TSTZ_F64<'_>, only_val: bool, mut func: impl FnMut(i64, f64) -> (Option, Option), ) { for point in series.points.as_owned() { let (new_time, new_val) = func(point.ts, point.val); *point = TSPoint { ts: if only_val { point.ts } else { new_time.unwrap_or(point.ts) }, val: new_val.unwrap_or(point.val), } } } #[pg_extern( stable, parallel_safe, name = "map_series", schema = "toolkit_experimental" )] pub fn map_series_pipeline_element( function: crate::raw::regproc, ) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { map_series_element(crate::raw::regproc::from(function.0)).flatten() } pub fn map_series_element<'a>(function: crate::raw::regproc) -> Element<'a> { let function: pg_sys::regproc = pg_sys::Oid::from(function.0.value() as u32) .try_into() .unwrap(); check_user_function_type(function); Element::MapSeries { function: PgProcId(function), } } pub fn check_user_function_type(function: pg_sys::regproc) { let mut argtypes: *mut pg_sys::Oid = ptr::null_mut(); let mut nargs: ::std::os::raw::c_int = 0; let rettype = unsafe { pg_sys::get_func_signature(function, &mut argtypes, &mut nargs) }; if nargs != 1 { error!("invalid number of mapping function arguments, expected fn(timevector) RETURNS timevector") } assert!(!argtypes.is_null()); if unsafe { *argtypes } != *crate::time_vector::TIMEVECTOR_OID { error!("invalid argument type, expected fn(timevector) RETURNS timevector") } if rettype != *crate::time_vector::TIMEVECTOR_OID { error!("invalid return type, expected fn(timevector) RETURNS timevector") } } pub fn apply_to_series( mut series: Timevector_TSTZ_F64<'_>, func: pg_sys::RegProcedure, ) -> Timevector_TSTZ_F64<'_> { let mut flinfo: pg_sys::FmgrInfo = unsafe { MaybeUninit::zeroed().assume_init() }; unsafe { pg_sys::fmgr_info(func, &mut flinfo); }; unsafe { // use pg_sys::FunctionCall1Coll to get the pg_guard let res = pg_sys::FunctionCall1Coll( &mut flinfo, pg_sys::InvalidOid, // SAFETY the input memory context will not end in the sub-function // and the sub-function will allocate the returned timevector series.cached_datum_or_flatten(), ); Timevector_TSTZ_F64::from_polymorphic_datum(res, false, pg_sys::InvalidOid) .expect("unexpected NULL in timevector mapping function") } } // TODO is (stable, parallel_safe) correct? #[pg_extern( stable, parallel_safe, name = "map_data", schema = "toolkit_experimental" )] pub fn map_data_pipeline_element( function: crate::raw::regproc, ) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { let mut argtypes: *mut pg_sys::Oid = ptr::null_mut(); let mut nargs: ::std::os::raw::c_int = 0; let rettype = unsafe { pg_sys::get_func_signature( pg_sys::Oid::from(function.0.value() as u32), &mut argtypes, &mut nargs, ) }; if nargs != 1 { error!("invalid number of mapping function arguments, expected fn(double precision) RETURNS double precision") } if unsafe { *argtypes } != pgrx::PgBuiltInOids::FLOAT8OID.value() { error!("invalid argument type, expected fn(double precision) RETURNS double precision") } if rettype != pgrx::PgBuiltInOids::FLOAT8OID.value() { error!("invalid return type, expected fn(double precision) RETURNS double precision") } Element::MapData { function: PgProcId(pg_sys::Oid::from(function.0.value() as u32)), } .flatten() } pub fn apply_to( mut series: Timevector_TSTZ_F64<'_>, func: pg_sys::RegProcedure, ) -> Timevector_TSTZ_F64<'_> { let mut flinfo: pg_sys::FmgrInfo = unsafe { MaybeUninit::zeroed().assume_init() }; let fn_addr: unsafe extern "C-unwind" fn( *mut pg_sys::FunctionCallInfoBaseData, ) -> pg_sys::Datum; let mut fc_info = unsafe { pg_sys::fmgr_info(func, &mut flinfo); fn_addr = flinfo.fn_addr.expect("null function in timevector map"); union FcInfo1 { data: ManuallyDrop, #[allow(dead_code)] bytes: [u8; mem::size_of::() + mem::size_of::()], } FcInfo1 { data: ManuallyDrop::new(pg_sys::FunctionCallInfoBaseData { flinfo: &mut flinfo, context: std::ptr::null_mut(), resultinfo: std::ptr::null_mut(), fncollation: pg_sys::InvalidOid, isnull: false, nargs: 1, args: pg_sys::__IncompleteArrayField::new(), }), } }; let invoke = |val: f64| unsafe { let fc_info = &mut *fc_info.data; let args = fc_info.args.as_mut_slice(1); args[0].value = val.into_datum().unwrap(); args[0].isnull = false; let res = fn_addr(fc_info); f64::from_polymorphic_datum(res, fc_info.isnull, pg_sys::InvalidOid) .expect("unexpected NULL in timevector mapping function") }; map_series(&mut series, invoke); series } pub fn map_series(series: &mut Timevector_TSTZ_F64<'_>, mut func: impl FnMut(f64) -> f64) { use std::panic::AssertUnwindSafe; let points = series.points.as_owned().iter_mut(); // setjump guard around the loop to reduce the amount we have to // call it // NOTE need to be careful that there's not allocation within the // loop body so it cannot leak pg_sys::PgTryBuilder::new(AssertUnwindSafe(|| { for point in points { *point = TSPoint { ts: point.ts, val: func(point.val), } } })) .execute() } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_pipeline_map_lambda() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); let val = client.update( "SELECT (timevector(time, value) -> map($$ ($time + '1 day'i, $value * 2) $$))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-05 00:00:00+00\",val:50),\ (ts:\"2020-01-02 00:00:00+00\",val:20),\ (ts:\"2020-01-04 00:00:00+00\",val:40),\ (ts:\"2020-01-03 00:00:00+00\",val:30),\ (ts:\"2020-01-06 00:00:00+00\",val:60)\ ],null_val:[0])" ); }); } #[pg_test] fn test_pipeline_map_lambda2() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); let expected = "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:725.7),\ (ts:\"2020-01-01 00:00:00+00\",val:166.2),\ (ts:\"2020-01-03 00:00:00+00\",val:489.2),\ (ts:\"2020-01-02 00:00:00+00\",val:302.7),\ (ts:\"2020-01-05 00:00:00+00\",val:1012.2)\ ],null_val:[0])"; let val = client .update( "SELECT (timevector(time, value) \ -> map($$ ($time, $value^2 + $value * 2.3 + 43.2) $$))::TEXT \ FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), expected); let val = client .update( "SELECT (timevector(time, value) \ -> map($$ ($value^2 + $value * 2.3 + 43.2) $$))::TEXT \ FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(val.unwrap(), expected); }); } #[pg_test] fn test_pipeline_map_data() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); client.update( "CREATE FUNCTION x2(double precision) RETURNS DOUBLE PRECISION AS 'SELECT $1 * 2;' LANGUAGE SQL", None, &[] ).unwrap(); let val = client .update( "SELECT (timevector(time, value) -> map_data('x2'))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:50),\ (ts:\"2020-01-01 00:00:00+00\",val:20),\ (ts:\"2020-01-03 00:00:00+00\",val:40),\ (ts:\"2020-01-02 00:00:00+00\",val:30),\ (ts:\"2020-01-05 00:00:00+00\",val:60)\ ],null_val:[0])" ); }); } #[pg_test] fn test_pipeline_map_series() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); client.update( "CREATE FUNCTION jan_3_x3(timevector_tstz_f64) RETURNS timevector_tstz_f64 AS $$\ SELECT timevector(time, value * 3) \ FROM (SELECT (unnest($1)).*) a \ WHERE time='2020-01-03 00:00:00+00';\ $$ LANGUAGE SQL", None, &[], ).unwrap(); let val = client .update( "SELECT (timevector(time, value) -> map_series('jan_3_x3'))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:1,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-03 00:00:00+00\",val:60)\ ],null_val:[0])" ); }); } #[pg_test] #[should_panic = "division by zero"] fn test_pipeline_map_series_failure() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); client.update( "CREATE FUNCTION always_fail(timevector_tstz_f64) RETURNS timevector_tstz_f64 AS $$ SELECT 0/0; SELECT $1; $$ LANGUAGE SQL", None, &[], ).unwrap(); client .update( "SELECT (timevector(time, value) -> map_series('always_fail'))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); }); } #[pg_test] #[should_panic = " returned NULL"] fn test_pipeline_map_series_null() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); client.update( "CREATE FUNCTION always_null(timevector_tstz_f64) RETURNS timevector_tstz_f64 AS $$ SELECT NULL::timevector_tstz_f64; $$ LANGUAGE SQL", None, &[], ).unwrap(); client .update( "SELECT (timevector(time, value) -> map_series('always_null'))::TEXT FROM series", None, &[] ) .unwrap().first() .get_one::().unwrap(); }); } #[pg_test] fn test_map_io() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-04 UTC'::TIMESTAMPTZ, 25.0), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10.0), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20.0), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15.0), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30.0)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:5,flags:0,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[0])" ); client .update( "CREATE FUNCTION serier(timevector_tstz_f64) RETURNS timevector_tstz_f64 AS $$\ SELECT $1;\ $$ LANGUAGE SQL", None, &[], ) .unwrap(); client .update( "CREATE FUNCTION dater(double precision) RETURNS double precision AS $$\ SELECT $1 * 3;\ $$ LANGUAGE SQL", None, &[], ) .unwrap(); let (a, b) = client .update( "SELECT map_series('serier')::TEXT, map_data('dater')::TEXT FROM series", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); let one = "\ (\ version:1,\ num_elements:1,\ elements:[\ MapSeries(\ function:\"public.serier(public.timevector_tstz_f64)\"\ )\ ]\ )"; let two = "\ (\ version:1,\ num_elements:1,\ elements:[\ MapData(\ function:\"public.dater(double precision)\"\ )\ ]\ )"; assert_eq!((&*a.unwrap(), &*b.unwrap()), (one, two)); // FIXME this doesn't work yet let (a, b) = client .update( &format!( "SELECT \ '{one}'::UnstableTimevectorPipeline::Text, \ '{two}'::UnstableTimevectorPipeline::Text" ), None, &[], ) .unwrap() .first() .get_two::() .unwrap(); assert_eq!((&*a.unwrap(), &*b.unwrap()), (one, two)); }); } } ================================================ FILE: extension/src/time_vector/pipeline/sort.rs ================================================ use pgrx::*; use super::*; // TODO is (immutable, parallel_safe) correct? #[pg_extern( immutable, parallel_safe, name = "sort", schema = "toolkit_experimental" )] pub fn sort_pipeline_element<'p>() -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Element::Sort {}.flatten() } pub fn sort_timevector(mut series: Timevector_TSTZ_F64<'_>) -> Timevector_TSTZ_F64<'_> { if series.is_sorted() { return series; } let (points, null_val) = if !series.has_nulls() { // easy case let mut points = std::mem::take(series.points.as_owned()); points.sort_by(|a, b| a.ts.cmp(&b.ts)); let nulls_len = points.len().div_ceil(8); (points, std::vec::from_elem(0_u8, nulls_len)) } else { let mut points: Vec<(usize, TSPoint)> = std::mem::take(series.points.as_owned()) .into_iter() .enumerate() .collect(); points.sort_by(|(_, a), (_, b)| a.ts.cmp(&b.ts)); let mut null_val = std::vec::from_elem(0_u8, points.len().div_ceil(8)); let points = points .into_iter() .enumerate() .map(|(new_idx, (old_idx, ts))| { if series.is_null_val(old_idx) { null_val[new_idx / 8] |= 1 << (new_idx % 8); } ts }) .collect(); (points, null_val) }; Timevector_TSTZ_F64Data { header: 0, version: 1, padding: [0; 3], num_points: points.len() as u32, flags: series.flags | FLAG_IS_SORTED, internal_padding: [0; 3], points: points.into(), null_val: null_val.into(), } .into() } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_pipeline_sort() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE series(time timestamptz, value double precision)", None, &[], ) .unwrap(); client .update( "INSERT INTO series \ VALUES \ ('2020-01-04 UTC'::TIMESTAMPTZ, 25), \ ('2020-01-01 UTC'::TIMESTAMPTZ, 10), \ ('2020-01-03 UTC'::TIMESTAMPTZ, 20), \ ('2020-01-02 UTC'::TIMESTAMPTZ, 15), \ ('2020-01-05 UTC'::TIMESTAMPTZ, 30), \ ('2020-01-02 12:00:00 UTC'::TIMESTAMPTZ, NULL)", None, &[], ) .unwrap(); let val = client .update( "SELECT (timevector(time, value))::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:6,flags:2,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-05 00:00:00+00\",val:30),\ (ts:\"2020-01-02 12:00:00+00\",val:NaN)\ ],null_val:[32])" ); let val = client .update( "SELECT (timevector(time, value) -> sort())::TEXT FROM series", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:6,flags:3,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-01 00:00:00+00\",val:10),\ (ts:\"2020-01-02 00:00:00+00\",val:15),\ (ts:\"2020-01-02 12:00:00+00\",val:NaN),\ (ts:\"2020-01-03 00:00:00+00\",val:20),\ (ts:\"2020-01-04 00:00:00+00\",val:25),\ (ts:\"2020-01-05 00:00:00+00\",val:30)\ ],null_val:[4])" ); }); } } ================================================ FILE: extension/src/time_vector/pipeline.rs ================================================ mod aggregation; mod arithmetic; mod delta; mod expansion; mod fill_to; mod filter; mod lambda; mod map; mod sort; use std::convert::TryInto; use pgrx::*; use super::*; use crate::{flatten, pg_type, ron_inout_funcs}; use fill_to::{fill_to, FillToMethod}; use delta::timevector_delta; use sort::sort_timevector; pub use self::toolkit_experimental::*; use crate::serialization::PgProcId; #[pg_schema] pub mod toolkit_experimental { use super::*; pub use crate::time_vector::Timevector_TSTZ_F64; pub(crate) use lambda::toolkit_experimental::{Lambda, LambdaData}; // TODO once we start stabilizing elements, create a type TimevectorPipeline // stable elements will create a stable pipeline, but adding an unstable // element to a stable pipeline will create an unstable pipeline pg_type! { #[derive(Debug)] struct UnstableTimevectorPipeline<'input> { num_elements: u64, elements: [Element<'input>; self.num_elements], } } flat_serialize_macro::flat_serialize! { #[derive(Debug)] #[derive(serde::Serialize, serde::Deserialize)] enum Element<'input> { kind: u64, LTTB: 1 { resolution: u64, }, // 2 was for resample_to_rate // 3 was for fill_holes Sort: 4 { }, Delta: 5 { }, MapData: 6 { // FIXME serialize/deserialize as `name(type)` function: PgProcId, }, MapSeries: 7 { // FIXME serialize/deserialize as `name(type)` function: PgProcId, }, Arithmetic: 8 { function: arithmetic::Function, rhs: f64, }, MapLambda: 9 { lambda: LambdaData<'input>, }, FilterLambda: 10 { lambda: LambdaData<'input>, }, FillTo: 11 { interval: i64, fill_method: FillToMethod, }, } } impl<'input> Element<'input> { pub fn flatten<'a>(self) -> UnstableTimevectorPipeline<'a> { // TODO it'd be nice not to have to allocate a vector here but // `let slice = &[self][..];` // gives a lifetime error I don't yet know how to solve let slice = vec![self].into(); unsafe { flatten! { UnstableTimevectorPipeline { num_elements: 1, elements: slice, } } } } } impl<'e> From> for UnstableTimevectorPipeline<'e> { fn from(element: Element<'e>) -> Self { build! { UnstableTimevectorPipeline { num_elements: 1, elements: vec![element].into(), } } } } ron_inout_funcs!(UnstableTimevectorPipeline<'input>); } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_run_pipeline<'a>( timevector: Timevector_TSTZ_F64<'a>, pipeline: toolkit_experimental::UnstableTimevectorPipeline<'a>, ) -> Timevector_TSTZ_F64<'static> { run_pipeline_elements(timevector, pipeline.elements.iter()).in_current_context() } pub fn run_pipeline_elements<'s, 'j, 'i>( mut timevector: Timevector_TSTZ_F64<'s>, pipeline: impl Iterator> + 'i, ) -> Timevector_TSTZ_F64<'s> { for element in pipeline { timevector = execute_pipeline_element(timevector, &element); } timevector } pub fn execute_pipeline_element<'s>( timevector: Timevector_TSTZ_F64<'s>, element: &Element, ) -> Timevector_TSTZ_F64<'s> { match element { Element::LTTB { resolution } => crate::lttb::lttb_ts(timevector, *resolution as _), Element::Sort { .. } => sort_timevector(timevector), Element::Delta { .. } => timevector_delta(&timevector), Element::MapData { function } => map::apply_to(timevector, function.0), Element::MapSeries { function } => map::apply_to_series(timevector, function.0), Element::MapLambda { lambda } => map::apply_lambda_to(timevector, lambda), Element::FilterLambda { lambda } => filter::apply_lambda_to(timevector, lambda), Element::Arithmetic { function, rhs } => arithmetic::apply(timevector, *function, *rhs), Element::FillTo { .. } => fill_to(timevector, element), } } // TODO is (immutable, parallel_safe) correct? #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_add_unstable_element<'p>( mut pipeline: toolkit_experimental::UnstableTimevectorPipeline<'p>, element: toolkit_experimental::UnstableTimevectorPipeline<'p>, ) -> toolkit_experimental::UnstableTimevectorPipeline<'p> { pipeline.elements.as_owned().extend(element.elements.iter()); pipeline.num_elements = pipeline.elements.len().try_into().unwrap(); pipeline } #[pg_extern( immutable, parallel_safe, schema = "toolkit_experimental", name = "toolkit_pipeline_support" )] pub unsafe fn pipeline_support(input: Internal) -> Internal { pipeline_support_helper(input, |old_pipeline, new_element| { let new_element = UnstableTimevectorPipeline::from_polymorphic_datum( new_element, false, pg_sys::Oid::INVALID, ) .unwrap(); arrow_add_unstable_element(old_pipeline, new_element) .into_datum() .unwrap() }) } pub(crate) unsafe fn pipeline_support_helper( input: Internal, make_new_pipeline: impl FnOnce(UnstableTimevectorPipeline, pg_sys::Datum) -> pg_sys::Datum, ) -> Internal { use std::mem::{size_of, MaybeUninit}; let input = input.unwrap().unwrap(); let input: *mut pg_sys::Node = input.cast_mut_ptr(); if !pgrx::is_a(input, pg_sys::NodeTag::T_SupportRequestSimplify) { return no_change(); } let req: *mut pg_sys::SupportRequestSimplify = input.cast(); let final_executor = (*req).fcall; let original_args = PgList::from_pg((*final_executor).args); assert_eq!(original_args.len(), 2); let arg1 = original_args.head().unwrap(); let arg2 = original_args.tail().unwrap(); let (executor_id, lhs_args) = if is_a(arg1, pg_sys::NodeTag::T_OpExpr) { let old_executor: *mut pg_sys::OpExpr = arg1.cast(); ((*old_executor).opfuncid, (*old_executor).args) } else if is_a(arg1, pg_sys::NodeTag::T_FuncExpr) { let old_executor: *mut pg_sys::FuncExpr = arg1.cast(); ((*old_executor).funcid, (*old_executor).args) } else { return no_change(); }; // check old_executor operator fn is 'run_pipeline' above static RUN_PIPELINE_OID: once_cell::sync::OnceCell = once_cell::sync::OnceCell::new(); match RUN_PIPELINE_OID.get() { Some(oid) => { if executor_id != *oid { return no_change(); } } None => { let executor_fn = { let mut flinfo: pg_sys::FmgrInfo = MaybeUninit::zeroed().assume_init(); pg_sys::fmgr_info(executor_id, &mut flinfo); flinfo.fn_addr }; // FIXME this cast should not be necessary; pgrx is defining the // wrapper functions as // `unsafe fn(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum` // instead of // `unsafe extern "C" fn(fcinfo: pg_sys::FunctionCallInfo) -> pg_sys::Datum` // we'll fix this upstream let expected_executor = arrow_run_pipeline_wrapper as usize; match executor_fn { None => return no_change(), // FIXME the direct comparison should work Some(func) if func as usize != expected_executor => return no_change(), Some(_) => RUN_PIPELINE_OID.get_or_init(|| executor_id), }; } } let lhs_args = PgList::from_pg(lhs_args); assert_eq!(lhs_args.len(), 2); let old_series = lhs_args.head().unwrap(); let old_const = lhs_args.tail().unwrap(); if !is_a(old_const, pg_sys::NodeTag::T_Const) { return no_change(); } let old_const: *mut pg_sys::Const = old_const.cast(); if !is_a(arg2, pg_sys::NodeTag::T_Const) { return no_change(); } let new_element_const: *mut pg_sys::Const = arg2.cast(); let old_pipeline = UnstableTimevectorPipeline::from_polymorphic_datum( (*old_const).constvalue, false, pg_sys::Oid::INVALID, ) .unwrap(); let new_pipeline = make_new_pipeline(old_pipeline, (*new_element_const).constvalue); let new_const = pg_sys::palloc(size_of::()).cast(); *new_const = *new_element_const; (*new_const).constvalue = new_pipeline; let new_executor = pg_sys::palloc(size_of::()).cast(); *new_executor = *final_executor; let mut new_executor_args = PgList::new(); new_executor_args.push(old_series); new_executor_args.push(new_const.cast()); (*new_executor).args = new_executor_args.into_pg(); Internal::from(Some(pg_sys::Datum::from(new_executor))) } // support functions are spec'd as returning NULL pointer if no simplification // can be made fn no_change() -> pgrx::Internal { Internal::from(Some(pg_sys::Datum::from( std::ptr::null_mut::(), ))) } // using this instead of pg_operator since the latter doesn't support schemas yet // FIXME there is no CREATE OR REPLACE OPERATOR need to update post-install.rs // need to ensure this works with out unstable warning extension_sql!( r#" ALTER FUNCTION "arrow_run_pipeline" SUPPORT toolkit_experimental.toolkit_pipeline_support; ALTER FUNCTION "arrow_add_unstable_element" SUPPORT toolkit_experimental.toolkit_pipeline_support; "#, name = "pipe_support", requires = [pipeline_support], ); // TODO is (immutable, parallel_safe) correct? #[pg_extern( immutable, parallel_safe, name = "lttb", schema = "toolkit_experimental" )] pub fn lttb_pipeline_element( resolution: i32, ) -> toolkit_experimental::UnstableTimevectorPipeline<'static> { Element::LTTB { resolution: resolution.try_into().unwrap(), } .flatten() } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_pipeline_lttb() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); client .update( "CREATE TABLE lttb_pipe (series timevector_tstz_f64)", None, &[], ) .unwrap(); client.update( "INSERT INTO lttb_pipe \ SELECT timevector(time, val) FROM ( \ SELECT \ '2020-01-01 UTC'::TIMESTAMPTZ + make_interval(days=>(foo*10)::int) as time, \ TRUNC((10 + 5 * cos(foo))::numeric, 4) as val \ FROM generate_series(1,11,0.1) foo \ ) bar", None, &[] ).unwrap(); let val = client .update( "SELECT (series -> lttb(17))::TEXT FROM lttb_pipe", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:17,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-11 00:00:00+00\",val:12.7015),\ (ts:\"2020-01-13 00:00:00+00\",val:11.8117),\ (ts:\"2020-01-22 00:00:00+00\",val:7.4757),\ (ts:\"2020-01-28 00:00:00+00\",val:5.4796),\ (ts:\"2020-02-03 00:00:00+00\",val:5.0626),\ (ts:\"2020-02-09 00:00:00+00\",val:6.3703),\ (ts:\"2020-02-14 00:00:00+00\",val:8.4633),\ (ts:\"2020-02-24 00:00:00+00\",val:13.1734),\ (ts:\"2020-03-01 00:00:00+00\",val:14.8008),\ (ts:\"2020-03-07 00:00:00+00\",val:14.7511),\ (ts:\"2020-03-13 00:00:00+00\",val:13.0417),\ (ts:\"2020-03-23 00:00:00+00\",val:8.3042),\ (ts:\"2020-03-29 00:00:00+00\",val:5.9445),\ (ts:\"2020-04-04 00:00:00+00\",val:5.0015),\ (ts:\"2020-04-10 00:00:00+00\",val:5.8046),\ (ts:\"2020-04-14 00:00:00+00\",val:7.195),\ (ts:\"2020-04-20 00:00:00+00\",val:10.0221)\ ],null_val:[0,0,0])" ); let val = client .update("SELECT (series -> lttb(8))::TEXT FROM lttb_pipe", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:8,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-11 00:00:00+00\",val:12.7015),\ (ts:\"2020-01-27 00:00:00+00\",val:5.7155),\ (ts:\"2020-02-06 00:00:00+00\",val:5.5162),\ (ts:\"2020-02-27 00:00:00+00\",val:14.1735),\ (ts:\"2020-03-09 00:00:00+00\",val:14.3469),\ (ts:\"2020-03-30 00:00:00+00\",val:5.6728),\ (ts:\"2020-04-09 00:00:00+00\",val:5.554),\ (ts:\"2020-04-20 00:00:00+00\",val:10.0221)\ ],null_val:[0])" ); let val = client .update( "SELECT (series -> lttb(8) -> lttb(8))::TEXT FROM lttb_pipe", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:8,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-11 00:00:00+00\",val:12.7015),\ (ts:\"2020-01-27 00:00:00+00\",val:5.7155),\ (ts:\"2020-02-06 00:00:00+00\",val:5.5162),\ (ts:\"2020-02-27 00:00:00+00\",val:14.1735),\ (ts:\"2020-03-09 00:00:00+00\",val:14.3469),\ (ts:\"2020-03-30 00:00:00+00\",val:5.6728),\ (ts:\"2020-04-09 00:00:00+00\",val:5.554),\ (ts:\"2020-04-20 00:00:00+00\",val:10.0221)\ ],null_val:[0])" ); let val = client .update( "SELECT (series -> (lttb(8) -> lttb(8) -> lttb(8)))::TEXT FROM lttb_pipe", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); assert_eq!( val.unwrap(), "(version:1,num_points:8,flags:1,internal_padding:(0,0,0),points:[\ (ts:\"2020-01-11 00:00:00+00\",val:12.7015),\ (ts:\"2020-01-27 00:00:00+00\",val:5.7155),\ (ts:\"2020-02-06 00:00:00+00\",val:5.5162),\ (ts:\"2020-02-27 00:00:00+00\",val:14.1735),\ (ts:\"2020-03-09 00:00:00+00\",val:14.3469),\ (ts:\"2020-03-30 00:00:00+00\",val:5.6728),\ (ts:\"2020-04-09 00:00:00+00\",val:5.554),\ (ts:\"2020-04-20 00:00:00+00\",val:10.0221)\ ],null_val:[0])" ); }); } #[pg_test] fn test_pipeline_folding() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); // using the search path trick for this test b/c the operator is // difficult to spot otherwise. let sp = client .update( "SELECT format(' %s, toolkit_experimental',current_setting('search_path'))", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); client .update(&format!("SET LOCAL search_path TO {sp}"), None, &[]) .unwrap(); let output = client.update( "EXPLAIN (verbose) SELECT timevector('2021-01-01'::timestamptz, 0.1) -> round() -> abs() -> round();", None, &[] ).unwrap().nth(1) .unwrap() .get_datum_by_ordinal(1).unwrap() .value::().unwrap().unwrap(); // check that it's executing as if we had input `timevector -> (round() -> abs())` assert_eq!(output.trim(), "Output: \ arrow_run_pipeline(\ timevector('2021-01-01 00:00:00+00'::timestamp with time zone, '0.1'::double precision), \ '(version:1,num_elements:3,elements:[\ Arithmetic(function:Round,rhs:0),\ Arithmetic(function:Abs,rhs:0),\ Arithmetic(function:Round,rhs:0)\ ])'::unstabletimevectorpipeline\ )"); }); } } ================================================ FILE: extension/src/time_vector.rs ================================================ #![allow(clippy::identity_op)] // clippy gets confused by pg_type! enums use crate::pg_sys::timestamptz_to_str; use core::str::Utf8Error; use pgrx::{iter::TableIterator, *}; use std::ffi::CStr; use tera::{Context, Tera}; use crate::{ aggregate_utils::in_aggregate_context, build, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; use tspoint::TSPoint; pub use iter::Iter; use flat_serialize::*; mod iter; mod pipeline; use crate::raw::bytea; // Bit flags stored in Timevector flags pub const FLAG_IS_SORTED: u8 = 0x01; pub const FLAG_HAS_NULLS: u8 = 0x01 << 1; pg_type! { #[derive(Debug)] #[allow(non_camel_case_types)] struct Timevector_TSTZ_F64<'input> { num_points: u32, flags: u8, // extra information about the stored data internal_padding: [u8; 3], // required to be aligned points: [TSPoint; self.num_points], null_val: [u8; self.num_points.div_ceil(8)], // bit vector, must be last element for alignment purposes } } ron_inout_funcs!(Timevector_TSTZ_F64<'input>); impl<'input> Timevector_TSTZ_F64<'input> { pub fn num_points(&self) -> usize { self.num_points as usize } // Gets the nth point of a timevector // Differs from normal vector get in that it returns a copy rather than a reference (as the point may have to be constructed) pub fn get(&self, index: usize) -> Option { if index >= self.num_points() { return None; } Some(self.points.as_slice()[index]) } #[inline] pub fn is_sorted(&self) -> bool { self.flags & FLAG_IS_SORTED != 0 } #[inline] pub fn has_nulls(&self) -> bool { self.flags & FLAG_HAS_NULLS != 0 } pub fn is_null_val(&self, index: usize) -> bool { assert!(index < self.num_points()); // should we handle this better let byte_id = index / 8; let byte_idx = index % 8; self.null_val.as_slice()[byte_id] & (1 << byte_idx) != 0 } fn clone_owned(&self) -> Timevector_TSTZ_F64<'static> { Timevector_TSTZ_F64Data::clone(self).into_owned().into() } } impl<'a> Timevector_TSTZ_F64<'a> { pub fn iter(&self) -> Iter<'_> { Iter::Slice { iter: self.points.iter(), } } pub fn num_vals(&self) -> usize { self.num_points() } } impl<'a> IntoIterator for Timevector_TSTZ_F64<'a> { type Item = TSPoint; type IntoIter = Iter<'a>; fn into_iter(self) -> Self::IntoIter { #[allow(clippy::unnecessary_to_owned)] // Pretty sure clippy's wrong about this Iter::Slice { iter: self.points.to_owned().into_iter(), } } } pub static TIMEVECTOR_OID: once_cell::sync::Lazy = once_cell::sync::Lazy::new(Timevector_TSTZ_F64::type_oid); #[pg_extern(immutable, parallel_safe)] pub fn unnest<'a>( series: Timevector_TSTZ_F64<'a>, ) -> TableIterator<'a, (name!(time, crate::raw::TimestampTz), name!(value, f64))> { TableIterator::new( series .into_iter() .map(|points| (points.ts.into(), points.val)), ) } /// Util function to convert from *const ::std::os::raw::c_char to String /// TimestampTz -> *const c_char -> &CStr -> &str -> String pub fn timestamptz_to_string(time: pg_sys::TimestampTz) -> Result { let char_ptr = unsafe { timestamptz_to_str(time) }; let c_str = unsafe { CStr::from_ptr(char_ptr) }; c_str.to_str().map(|s| s.to_owned()) } #[pg_extern(immutable, schema = "toolkit_experimental", parallel_safe)] pub fn to_plotly<'a>(series: Timevector_TSTZ_F64<'a>) -> String { format_timevector(series,"{\"times\": {{ TIMES | json_encode() | safe }}, \"vals\": {{ VALUES | json_encode() | safe }}}".to_string()) } #[pg_extern(immutable, schema = "toolkit_experimental", parallel_safe)] pub fn to_text<'a>(series: Timevector_TSTZ_F64<'a>, format_string: String) -> String { format_timevector(series, format_string) } pub fn format_timevector<'a>(series: Timevector_TSTZ_F64<'a>, format_string: String) -> String { let mut context = Context::new(); let mut times: Vec = Vec::new(); let mut values: Vec = Vec::new(); if series.has_nulls() { for (i, point) in series.iter().enumerate() { times.push(timestamptz_to_string(point.ts).unwrap()); if series.is_null_val(i) { values.push("null".to_string()) } else { match point.val.to_string().as_ref() { "NaN" | "inf" | "-inf" | "Infinity" | "-Infinity" => { panic!("All values in the series must be finite") } x => values.push(x.to_string()), } } } } else { // optimized path if series does not have any nulls, but might have some NaNs/infinities for point in series { times.push(timestamptz_to_string(point.ts).unwrap()); match point.val.to_string().as_ref() { "NaN" | "inf" | "-inf" | "Infinity" | "-Infinity" => { panic!("All values in the series must be finite") } x => values.push(x.to_string()), } } } context.insert("TIMES", ×); context.insert("VALUES", &values); // paired timevals in the following format: [{\"time\": \"2020-01-01 00:00:00+00\", \"val\": 1}, {\"time\": \"2020-01-02 00:00:00+00\", \"val\": 2}, ... ] let timevals = Tera::one_off("[{% for x in TIMES %}{\"time\": \"{{ x }}\", \"val\": {{ VALUES[loop.index0] }}}{% if not loop.last %},{% endif %} {% endfor %}]", &context,false).expect("Failed to create paired template"); context.insert("TIMEVALS", &timevals); Tera::one_off(format_string.as_ref(), &context, false) .expect("Failed to create template with Tera") } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_timevector_unnest<'a>( series: Timevector_TSTZ_F64<'a>, _accessor: crate::accessors::AccessorUnnest, ) -> TableIterator<'a, (name!(time, crate::raw::TimestampTz), name!(value, f64))> { unnest(series) } #[pg_extern(immutable, parallel_safe, strict)] pub fn timevector_serialize(state: Internal) -> bytea { let state: &Timevector_TSTZ_F64 = unsafe { state.get().unwrap() }; let state: &Timevector_TSTZ_F64Data = &state.0; crate::do_serialize!(state) } #[pg_extern(strict, immutable, parallel_safe)] pub fn timevector_deserialize(bytes: bytea, _internal: Internal) -> Option { let data: Timevector_TSTZ_F64<'static> = crate::do_deserialize!(bytes, Timevector_TSTZ_F64Data); Inner::from(data).internal() } #[pg_extern(immutable, parallel_safe)] pub fn timevector_tstz_f64_trans( state: Internal, time: Option, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { timevector_trans_inner(state.to_inner(), time, value, fcinfo).internal() } } pub fn timevector_trans_inner( state: Option>>, time: Option, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || { let time: pg_sys::TimestampTz = match time { None => return state, Some(time) => time.into(), }; let mut state = match state { None => Inner::from(build! { Timevector_TSTZ_F64 { num_points: 0, flags: FLAG_IS_SORTED, internal_padding: [0; 3], points: vec![].into(), null_val: vec![].into(), } }), Some(state) => state, }; if let Some(last_point) = state.points.as_slice().last() { if state.is_sorted() && last_point.ts > time { state.flags ^= FLAG_IS_SORTED; } } if state.num_points % 8 == 0 { state.null_val.as_owned().push(0); } match value { None => { state.flags |= FLAG_HAS_NULLS; state.points.as_owned().push(TSPoint { ts: time, val: f64::NAN, }); let byte_idx = state.num_points % 8; // off by 1, but num_points isn't yet incremented *state.null_val.as_owned().last_mut().unwrap() |= 1 << byte_idx; } Some(val) => state.points.as_owned().push(TSPoint { ts: time, val }), }; state.num_points += 1; Some(state) }) } } #[pg_extern(immutable, parallel_safe)] pub fn timevector_tstz_f64_compound_trans<'a>( state: Internal, series: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { inner_compound_trans(unsafe { state.to_inner() }, series, fcinfo).internal() } pub fn inner_compound_trans<'b>( state: Option>>, series: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || match (state, series) { (None, None) => None, (Some(state), None) => Some(state), (None, Some(series)) => Some(series.clone_owned().into()), (Some(state), Some(series)) => { // TODO: this should be doable without cloning 'state' Some(combine(state.clone(), series.clone()).into()) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn timevector_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { inner_combine(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn inner_combine<'a, 'b>( state1: Option>>, state2: Option>>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option>> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (None, Some(state2)) => Some(state2.clone_owned().into()), (Some(state1), None) => Some(state1.clone_owned().into()), (Some(state1), Some(state2)) => Some(combine(state1.clone(), state2.clone()).into()), }) } } pub fn combine( first: Timevector_TSTZ_F64<'_>, second: Timevector_TSTZ_F64<'_>, ) -> Timevector_TSTZ_F64<'static> { if first.num_vals() == 0 { return second.clone_owned(); } if second.num_vals() == 0 { return first.clone_owned(); } let is_sorted = first.is_sorted() && second.is_sorted() && first.points.as_slice().last().unwrap().ts <= second.points.as_slice().first().unwrap().ts; let points: Vec<_> = first.iter().chain(second.iter()).collect(); let mut flags = (first.flags & FLAG_HAS_NULLS) | (second.flags & FLAG_HAS_NULLS); if is_sorted { flags |= FLAG_IS_SORTED; } let null_val = if flags & FLAG_HAS_NULLS == 0 { std::vec::from_elem(0_u8, points.len().div_ceil(8)) } else { let mut v = first.null_val.as_slice().to_vec(); v.resize(points.len().div_ceil(8), 0); if second.has_nulls() { for i in 0..second.num_points { if second.is_null_val(i as usize) { let idx = i + first.num_points; let byte_id = idx / 8; let byte_idx = idx % 8; v[byte_id as usize] |= 1 << byte_idx; } } } v }; build! { Timevector_TSTZ_F64 { num_points: points.len() as _, flags, internal_padding: [0; 3], points: points.into(), null_val: null_val.into(), } } } #[pg_extern(immutable, parallel_safe)] pub fn timevector_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { timevector_final_inner(state.to_inner(), fcinfo) } } pub fn timevector_final_inner<'a>( state: Option>>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let state = match state { None => return None, Some(state) => state, }; Some(state.in_current_context()) }) } } extension_sql!( "\n\ CREATE AGGREGATE timevector(ts TIMESTAMPTZ, value DOUBLE PRECISION) (\n\ sfunc = timevector_tstz_f64_trans,\n\ stype = internal,\n\ finalfunc = timevector_final,\n\ combinefunc = timevector_combine,\n\ serialfunc = timevector_serialize,\n\ deserialfunc = timevector_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "timevector_tstz_f64_agg", requires = [ timevector_tstz_f64_trans, timevector_final, timevector_combine, timevector_serialize, timevector_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ timevector_tstz_f64\n\ ) (\n\ sfunc = timevector_tstz_f64_compound_trans,\n\ stype = internal,\n\ finalfunc = timevector_final,\n\ combinefunc = timevector_combine,\n\ serialfunc = timevector_serialize,\n\ deserialfunc = timevector_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "timevector_tstz_f64_rollup", requires = [ timevector_tstz_f64_compound_trans, timevector_final, timevector_combine, timevector_serialize, timevector_deserialize ], ); #[pg_schema] pub mod toolkit_experimental { use super::*; // Only making this available through the arrow operator right now, as the semantics are cleaner that way pub fn asof_join<'a, 'b>( from: Timevector_TSTZ_F64<'a>, into: Timevector_TSTZ_F64<'b>, ) -> TableIterator< 'a, ( name!(value1, Option), name!(value2, f64), name!(time, crate::raw::TimestampTz), ), > { assert!( from.num_points > 0 && into.num_points > 0, "both timevectors must be populated for an asof join" ); let mut from = from .into_iter() .map(|points| (points.ts.into(), points.val)) .peekable(); let into = into.into_iter().map(|points| (points.ts, points.val)); let (mut from_time, mut from_val) = from.next().unwrap(); let mut results = vec![]; for (into_time, into_val) in into { // Handle case where into starts before from if into_time < from_time { results.push((None, into_val, crate::raw::TimestampTz::from(into_time))); continue; } while let Some((peek_time, _)) = from.peek() { if *peek_time > into_time { break; } (from_time, from_val) = from.next().unwrap(); } results.push(( Some(from_val), into_val, crate::raw::TimestampTz::from(into_time), )); } TableIterator::new(results.into_iter()) } pg_type! { #[derive(Debug)] struct AccessorAsof<'input> { into: Timevector_TSTZ_F64Data<'input>, } } ron_inout_funcs!(AccessorAsof<'input>); #[pg_extern(immutable, parallel_safe, name = "asof")] pub fn accessor_asof<'a>(tv: Timevector_TSTZ_F64<'a>) -> AccessorAsof<'static> { unsafe { flatten! { AccessorAsof { into: tv.0 } } } } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_timevector_asof<'a>( series: Timevector_TSTZ_F64<'a>, accessor: toolkit_experimental::AccessorAsof, ) -> TableIterator< 'a, ( name!(value1, Option), name!(value2, f64), name!(time, crate::raw::TimestampTz), ), > { toolkit_experimental::asof_join(series, accessor.into.clone().into()) } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] pub fn test_unnest() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(time TIMESTAMPTZ, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client .update( r#"INSERT INTO data VALUES ('2020-1-1', 30.0), ('2020-1-2', 45.0), ('2020-1-3', NULL), ('2020-1-4', 55.5), ('2020-1-5', 10.0)"#, None, &[], ) .unwrap(); let mut unnest = client .update( "SELECT unnest(timevector(time, value))::TEXT FROM data", None, &[], ) .unwrap(); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",30)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-02 00:00:00+00\",45)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-03 00:00:00+00\",NaN)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-04 00:00:00+00\",55.5)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-05 00:00:00+00\",10)") ); assert!(unnest.next().is_none()); }) } #[pg_test] pub fn test_format_timevector() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(time TIMESTAMPTZ, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client .update( r#"INSERT INTO data VALUES ('2020-1-1', 30.0), ('2020-1-2', 45.0), ('2020-1-3', NULL), ('2020-1-4', 55.5), ('2020-1-5', 10.0)"#, None, &[], ) .unwrap(); let test_plotly_template = client .update( "SELECT toolkit_experimental.to_plotly(timevector(time, value)) FROM data", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_plotly_template, "{\"times\": [\"2020-01-01 00:00:00+00\",\"2020-01-02 00:00:00+00\",\"2020-01-03 00:00:00+00\",\"2020-01-04 00:00:00+00\",\"2020-01-05 00:00:00+00\"], \"vals\": [\"30\",\"45\",\"null\",\"55.5\",\"10\"]}" ); let test_paired_timevals_template = client.update( "SELECT toolkit_experimental.to_text(timevector(time, value),'{{TIMEVALS}}') FROM data", None, &[] ).unwrap().first() .get_one::().unwrap() .unwrap(); assert_eq!( test_paired_timevals_template,"[{\"time\": \"2020-01-01 00:00:00+00\", \"val\": 30}, {\"time\": \"2020-01-02 00:00:00+00\", \"val\": 45}, {\"time\": \"2020-01-03 00:00:00+00\", \"val\": null}, {\"time\": \"2020-01-04 00:00:00+00\", \"val\": 55.5}, {\"time\": \"2020-01-05 00:00:00+00\", \"val\": 10} ]" ); let test_user_supplied_template = client .update( "SELECT toolkit_experimental.to_text(timevector(time,value), '{\"times\": {{ TIMES }}, \"vals\": {{ VALUES }}}') FROM data", None, &[] ) .unwrap().first() .get_one::().unwrap() .unwrap(); assert_eq!( test_user_supplied_template,"{\"times\": [2020-01-01 00:00:00+00, 2020-01-02 00:00:00+00, 2020-01-03 00:00:00+00, 2020-01-04 00:00:00+00, 2020-01-05 00:00:00+00], \"vals\": [30, 45, null, 55.5, 10]}" ); let test_user_supplied_json_template = client.update( "SELECT toolkit_experimental.to_text(timevector(time, value),'{\"times\": {{ TIMES | json_encode() | safe }}, \"vals\": {{ VALUES | json_encode() | safe }}}') FROM data", None, &[] ).unwrap().first() .get_one::().unwrap() .unwrap(); assert_eq!( test_user_supplied_json_template, "{\"times\": [\"2020-01-01 00:00:00+00\",\"2020-01-02 00:00:00+00\",\"2020-01-03 00:00:00+00\",\"2020-01-04 00:00:00+00\",\"2020-01-05 00:00:00+00\"], \"vals\": [\"30\",\"45\",\"null\",\"55.5\",\"10\"]}" ); }) } #[should_panic = "All values in the series must be finite"] #[pg_test] pub fn test_format_timevector_panics_on_infinities() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(time TIMESTAMPTZ, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client .update( r#"INSERT INTO data VALUES ('2020-1-1', 30.0), ('2020-1-2', 45.0), ('2020-1-3', NULL), ('2020-1-4', 55.5), ('2020-1-6', 'Infinity'), ('2020-1-5', 10.0)"#, None, &[], ) .unwrap(); let test_plotly_template = client .update( "SELECT toolkit_experimental.to_plotly(timevector(time, value)) FROM data", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_plotly_template,"{\"times\": [\n \"2020-01-01 00:00:00+00\",\n \"2020-01-02 00:00:00+00\",\n \"2020-01-03 00:00:00+00\",\n \"2020-01-04 00:00:00+00\",\n \"2020-01-05 00:00:00+00\"\n], \"vals\": [\n \"30\",\n \"45\",\n \"null\",\n \"55.5\",\n \"10\"\n]}" ); }) } #[pg_test] pub fn timevector_io() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(time TIMESTAMPTZ, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client .update( r#"INSERT INTO data VALUES ('2020-1-1', 30.0), ('2020-1-2', 45.0), ('2020-1-3', NULL), ('2020-1-4', 55.5), ('2020-1-5', 10.0)"#, None, &[], ) .unwrap(); let tvec = client .update("SELECT timevector(time,value)::TEXT FROM data", None, &[]) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let expected = r#"(version:1,num_points:5,flags:3,internal_padding:(0,0,0),points:[(ts:"2020-01-01 00:00:00+00",val:30),(ts:"2020-01-02 00:00:00+00",val:45),(ts:"2020-01-03 00:00:00+00",val:NaN),(ts:"2020-01-04 00:00:00+00",val:55.5),(ts:"2020-01-05 00:00:00+00",val:10)],null_val:[4])"#; assert_eq!(tvec, expected); let mut unnest = client .update( &format!("SELECT unnest('{expected}'::timevector_tstz_f64)::TEXT"), None, &[], ) .unwrap(); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",30)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-02 00:00:00+00\",45)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-03 00:00:00+00\",NaN)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-04 00:00:00+00\",55.5)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-05 00:00:00+00\",10)") ); assert!(unnest.next().is_none()); }) } #[pg_test] pub fn test_arrow_equivalence() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(time TIMESTAMPTZ, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client .update( r#"INSERT INTO data VALUES ('1-1-2020', 30.0), ('1-2-2020', 45.0), ('1-3-2020', NULL), ('1-4-2020', 55.5), ('1-5-2020', 10.0)"#, None, &[], ) .unwrap(); let mut func = client .update( "SELECT unnest(timevector(time, value))::TEXT FROM data", None, &[], ) .unwrap(); let mut op = client .update( "SELECT (timevector(time, value) -> unnest())::TEXT FROM data", None, &[], ) .unwrap(); let mut test = true; while test { match (func.next(), op.next()) { (None, None) => test = false, (Some(a), Some(b)) => assert_eq!(a[1].value::<&str>(), b[1].value::<&str>()), _ => panic!("Arrow operator didn't contain the same number of elements as nested function"), }; } }) } #[pg_test] pub fn test_rollup() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update( "CREATE TABLE data(time TIMESTAMPTZ, value DOUBLE PRECISION, bucket INTEGER)", None, &[], ) .unwrap(); client .update( r#"INSERT INTO data VALUES ('2020-1-1', 30.0, 1), ('2020-1-2', 45.0, 1), ('2020-1-3', NULL, 2), ('2020-1-4', 55.5, 2), ('2020-1-5', 10.0, 3), ('2020-1-6', 13.0, 3), ('2020-1-7', 71.0, 4), ('2020-1-8', 0.0, 4)"#, None, &[], ) .unwrap(); let mut unnest = client .update( "SELECT unnest(rollup(tvec))::TEXT FROM ( SELECT timevector(time, value) AS tvec FROM data GROUP BY bucket ORDER BY bucket ) s", None, &[], ) .unwrap(); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-01 00:00:00+00\",30)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-02 00:00:00+00\",45)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-03 00:00:00+00\",NaN)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-04 00:00:00+00\",55.5)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-05 00:00:00+00\",10)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-06 00:00:00+00\",13)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-07 00:00:00+00\",71)") ); assert_eq!( unnest.next().unwrap()[1].value().unwrap(), Some("(\"2020-01-08 00:00:00+00\",0)") ); assert!(unnest.next().is_none()); }) } #[pg_test] fn test_rollup_preserves_nulls_flag() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client .update("CREATE TABLE tvecs (vector Timevector_TSTZ_F64)", None, &[]) .unwrap(); client .update( "INSERT INTO tvecs SELECT timevector('2020-1-1', 20)", None, &[], ) .unwrap(); client .update( "INSERT INTO tvecs SELECT timevector('2020-1-2', 30)", None, &[], ) .unwrap(); client .update( "INSERT INTO tvecs SELECT timevector('2020-1-3', 15)", None, &[], ) .unwrap(); let tvec = client .update("SELECT rollup(vector)::TEXT FROM tvecs", None, &[]) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let expected = r#"(version:1,num_points:3,flags:1,internal_padding:(0,0,0),points:[(ts:"2020-01-01 00:00:00+00",val:20),(ts:"2020-01-02 00:00:00+00",val:30),(ts:"2020-01-03 00:00:00+00",val:15)],null_val:[0])"#; assert_eq!(tvec, expected); client .update( "INSERT INTO tvecs SELECT timevector('2019-1-4', NULL)", None, &[], ) .unwrap(); let tvec = client .update("SELECT rollup(vector)::TEXT FROM tvecs", None, &[]) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let expected = r#"(version:1,num_points:4,flags:2,internal_padding:(0,0,0),points:[(ts:"2020-01-01 00:00:00+00",val:20),(ts:"2020-01-02 00:00:00+00",val:30),(ts:"2020-01-03 00:00:00+00",val:15),(ts:"2019-01-04 00:00:00+00",val:NaN)],null_val:[8])"#; assert_eq!(tvec, expected); }) } #[pg_test] fn test_asof_join() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let mut result = client .update( "WITH s as ( SELECT timevector(time, value) AS v1 FROM (VALUES ('2022-10-1 1:00 UTC'::TIMESTAMPTZ, 20.0), ('2022-10-1 2:00 UTC'::TIMESTAMPTZ, 30.0), ('2022-10-1 3:00 UTC'::TIMESTAMPTZ, 40.0) ) as v(time, value)), t as ( SELECT timevector(time, value) AS v2 FROM (VALUES ('2022-10-1 0:30 UTC'::TIMESTAMPTZ, 15.0), ('2022-10-1 2:00 UTC'::TIMESTAMPTZ, 45.0), ('2022-10-1 3:30 UTC'::TIMESTAMPTZ, 60.0) ) as v(time, value)) SELECT (v1 -> toolkit_experimental.asof(v2))::TEXT FROM s, t;", None, &[], ) .unwrap(); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(,15,\"2022-10-01 00:30:00+00\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(30,45,\"2022-10-01 02:00:00+00\")") ); assert_eq!( result.next().unwrap()[1].value().unwrap(), Some("(40,60,\"2022-10-01 03:30:00+00\")") ); assert!(result.next().is_none()); }) } #[pg_test(error = "both timevectors must be populated for an asof join")] fn test_asof_none() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client.update( "WITH s as ( SELECT timevector(now(), 0) -> toolkit_experimental.filter($$ $value != 0 $$) AS empty), t as ( SELECT timevector(time, value) AS valid FROM (VALUES ('2022-10-1 0:30 UTC'::TIMESTAMPTZ, 15.0), ('2022-10-1 2:00 UTC'::TIMESTAMPTZ, 45.0), ('2022-10-1 3:30 UTC'::TIMESTAMPTZ, 60.0) ) as v(time, value)) SELECT (valid -> toolkit_experimental.asof(empty)) FROM s, t;", None, &[]).unwrap(); }) } #[pg_test(error = "both timevectors must be populated for an asof join")] fn test_none_asof() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); client.update( "WITH s as ( SELECT timevector(now(), 0) -> toolkit_experimental.filter($$ $value != 0 $$) AS empty), t as ( SELECT timevector(time, value) AS valid FROM (VALUES ('2022-10-1 0:30 UTC'::TIMESTAMPTZ, 15.0), ('2022-10-1 2:00 UTC'::TIMESTAMPTZ, 45.0), ('2022-10-1 3:30 UTC'::TIMESTAMPTZ, 60.0) ) as v(time, value)) SELECT (empty -> toolkit_experimental.asof(valid)) FROM s, t;", None, &[]).unwrap(); }) } } ================================================ FILE: extension/src/time_weighted_average/accessors.rs ================================================ use pgrx::*; use crate::time_weighted_average::DurationUnit; use crate::{ datum_utils::interval_to_ms, flatten, pg_type, ron_inout_funcs, time_weighted_average::{TimeWeightMethod, TimeWeightSummary, TimeWeightSummaryData}, }; use tspoint::TSPoint; pg_type! { struct TimeWeightInterpolatedAverageAccessor { timestamp : i64, interval : i64, prev : TimeWeightSummaryData, pad : [u8;3], flags : u32, next : TimeWeightSummaryData, } } ron_inout_funcs!(TimeWeightInterpolatedAverageAccessor); #[pg_extern(immutable, parallel_safe, name = "interpolated_average")] fn time_weight_interpolated_average_accessor( start: crate::raw::TimestampTz, duration: crate::raw::Interval, prev: default!(Option, "NULL"), next: default!(Option, "NULL"), ) -> TimeWeightInterpolatedAverageAccessor { fn empty_summary() -> Option { Some(unsafe { flatten!(TimeWeightSummary { first: TSPoint { ts: 0, val: 0.0 }, last: TSPoint { ts: 0, val: 0.0 }, weighted_sum: 0.0, method: TimeWeightMethod::LOCF, }) }) } let flags = u32::from(prev.is_some()) + if next.is_some() { 2 } else { 0 }; let prev = prev.or_else(empty_summary).unwrap().0; let next = next.or_else(empty_summary).unwrap().0; let interval = interval_to_ms(&start, &duration); crate::build! { TimeWeightInterpolatedAverageAccessor { timestamp : start.into(), interval, prev, pad : [0,0,0], flags, next, } } } pg_type! { #[derive(Debug)] struct TimeWeightInterpolatedIntegralAccessor { start : i64, interval : i64, prev : TimeWeightSummaryData, pad : [u8;3], unit : u32, flags: u64, next : TimeWeightSummaryData, } } ron_inout_funcs!(TimeWeightInterpolatedIntegralAccessor); #[pg_extern(immutable, parallel_safe, name = "interpolated_integral")] fn time_weight_interpolated_integral_accessor( start: crate::raw::TimestampTz, interval: crate::raw::Interval, prev: default!(Option, "NULL"), next: default!(Option, "NULL"), unit: default!(String, "'second'"), ) -> TimeWeightInterpolatedIntegralAccessor { fn empty_summary() -> Option { Some(unsafe { flatten!(TimeWeightSummary { first: TSPoint { ts: 0, val: 0.0 }, last: TSPoint { ts: 0, val: 0.0 }, weighted_sum: 0.0, method: TimeWeightMethod::LOCF, }) }) } let unit = match DurationUnit::from_str(&unit) { Some(unit) => unit.microseconds(), None => pgrx::error!( "Unrecognized duration unit: {}. Valid units are: usecond, msecond, second, minute, hour", unit, ), }; let flags = u64::from(prev.is_some()) + if next.is_some() { 2 } else { 0 }; let prev = prev.or_else(empty_summary).unwrap().0; let next = next.or_else(empty_summary).unwrap().0; let interval = interval_to_ms(&start, &interval); crate::build! { TimeWeightInterpolatedIntegralAccessor { start: start.into(), interval, prev, pad : [0,0,0], unit, flags, next, } } } ================================================ FILE: extension/src/time_weighted_average.rs ================================================ #![allow(non_camel_case_types)] use pgrx::*; use serde::{Deserialize, Serialize}; use crate::{ accessors::{ AccessorAverage, AccessorFirstTime, AccessorFirstVal, AccessorIntegral, AccessorLastTime, AccessorLastVal, }, aggregate_utils::in_aggregate_context, duration::DurationUnit, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, ron_inout_funcs, }; use tspoint::TSPoint; use time_weighted_average::{ TimeWeightError, TimeWeightMethod, TimeWeightSummary as TimeWeightSummaryInternal, }; use crate::raw::bytea; mod accessors; use accessors::{TimeWeightInterpolatedAverageAccessor, TimeWeightInterpolatedIntegralAccessor}; pg_type! { #[derive(Debug)] struct TimeWeightSummary { first: TSPoint, last: TSPoint, weighted_sum: f64, method: TimeWeightMethod, } } ron_inout_funcs!(TimeWeightSummary); impl TimeWeightSummary { fn internal(&self) -> TimeWeightSummaryInternal { TimeWeightSummaryInternal { method: self.method, first: self.first, last: self.last, w_sum: self.weighted_sum, } } pub(super) fn interpolate( &self, interval_start: i64, interval_len: i64, prev: Option, next: Option, ) -> TimeWeightSummary { assert!( interval_start <= self.first.ts, "Interval start ({}) must be at or before first timestamp ({})", interval_start, self.first.ts ); let end = interval_start + interval_len; assert!( end > self.last.ts, "Interval end ({}) must be after last timestamp ({})", end, self.last.ts ); let mut new_sum = self.weighted_sum; let new_start = match prev { Some(prev) if interval_start < self.first.ts => { let new_start = self .method .interpolate(prev.last, Some(self.first), interval_start) .expect("unable to interpolate start of interval"); new_sum += self.method.weighted_sum(new_start, self.first); new_start } _ => self.first, }; let new_end = match (self.method, next) { (_, Some(next)) => { let new_end = self .method .interpolate(self.last, Some(next.first), end) .expect("unable to interpolate end of interval"); new_sum += self.method.weighted_sum(self.last, new_end); new_end } (TimeWeightMethod::LOCF, None) => { let new_end = self .method .interpolate(self.last, None, end) .expect("unable to interpolate end of interval"); new_sum += self.method.weighted_sum(self.last, new_end); new_end } _ => self.last, }; unsafe { crate::flatten!(TimeWeightSummary { first: new_start, last: new_end, weighted_sum: new_sum, method: self.method, }) } } } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] pub struct TimeWeightTransState { #[serde(skip)] point_buffer: Vec, method: TimeWeightMethod, summary_buffer: Vec, } impl TimeWeightTransState { fn push_point(&mut self, value: TSPoint) { self.point_buffer.push(value); } fn combine_points(&mut self) { if self.point_buffer.is_empty() { return; } self.point_buffer.sort_unstable_by_key(|p| p.ts); self.summary_buffer.push( TimeWeightSummaryInternal::new_from_sorted_iter(&self.point_buffer, self.method) .unwrap(), ); self.point_buffer.clear(); } fn push_summary(&mut self, other: &TimeWeightTransState) { let cb = other.summary_buffer.clone(); for val in cb.into_iter() { self.summary_buffer.push(val); } } fn combine_summaries(&mut self) { self.combine_points(); if self.summary_buffer.len() <= 1 { return; } self.summary_buffer.sort_unstable_by_key(|s| s.first.ts); self.summary_buffer = vec![TimeWeightSummaryInternal::combine_sorted_iter(&self.summary_buffer).unwrap()]; } } #[pg_extern(immutable, parallel_safe, strict)] pub fn time_weight_trans_serialize(state: Internal) -> bytea { let mut state: Inner = unsafe { state.to_inner().unwrap() }; state.combine_summaries(); crate::do_serialize!(state) } #[pg_extern(strict, immutable, parallel_safe)] pub fn time_weight_trans_deserialize(bytes: bytea, _internal: Internal) -> Option { time_weight_trans_deserialize_inner(bytes).internal() } pub fn time_weight_trans_deserialize_inner(bytes: bytea) -> Inner { let t: TimeWeightTransState = crate::do_deserialize!(bytes, TimeWeightTransState); t.into() } // these are technically parallel_safe (as in they can be called in a parallel context) even though the aggregate itself is parallel restricted. #[pg_extern(immutable, parallel_safe)] pub fn time_weight_trans( state: Internal, method: String, ts: Option, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { time_weight_trans_inner(state.to_inner(), method, ts, val, fcinfo).internal() } } pub fn time_weight_trans_inner( state: Option>, method: String, ts: Option, val: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let p = match (ts, val) { (_, None) => return state, (None, _) => return state, (Some(ts), Some(val)) => TSPoint { ts: ts.into(), val }, }; match state { None => { let mut s = TimeWeightTransState { point_buffer: vec![], // TODO technically not portable to ASCII-compatible charsets method: match method.trim().to_lowercase().as_str() { "linear" | "trapezoidal" => TimeWeightMethod::Linear, "locf" => TimeWeightMethod::LOCF, _ => panic!("unknown method"), }, summary_buffer: vec![], }; s.push_point(p); Some(s.into()) } Some(mut s) => { s.push_point(p); Some(s) } } }) } } #[pg_extern(immutable, parallel_safe)] pub fn time_weight_summary_trans( state: Internal, next: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { time_weight_summary_trans_inner(unsafe { state.to_inner() }, next, fcinfo).internal() } pub fn time_weight_summary_trans_inner( state: Option>, next: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state, next) { (None, None) => None, (None, Some(next)) => Some( TimeWeightTransState { summary_buffer: vec![next.internal()], point_buffer: vec![], method: next.method, } .into(), ), (Some(state), None) => Some(state), (Some(mut state), Some(next)) => { let next = TimeWeightTransState { summary_buffer: vec![next.internal()], point_buffer: vec![], method: next.method, }; state.push_summary(&next); Some(state) } }) } } #[pg_extern(immutable, parallel_safe)] pub fn time_weight_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { time_weight_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn time_weight_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { match (state1, state2) { (None, None) => None, (None, Some(state2)) => { let mut s = state2.clone(); s.combine_points(); Some(s.into()) } (Some(state1), None) => { let mut s = state1.clone(); s.combine_points(); Some(s.into()) } (Some(state1), Some(state2)) => { let mut s1 = state1.clone(); // is there a way to avoid if it doesn't need it? s1.combine_points(); let mut s2 = state2.clone(); s2.combine_points(); s2.push_summary(&s1); Some(s2.into()) } } }) } } #[pg_extern(immutable, parallel_safe)] fn time_weight_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { time_weight_final_inner(unsafe { state.to_inner() }, fcinfo) } fn time_weight_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { in_aggregate_context(fcinfo, || { let mut state = match state { None => return None, Some(state) => state.clone(), }; state.combine_summaries(); debug_assert!(state.summary_buffer.len() <= 1); state.summary_buffer.pop().map(|st| { flatten!(TimeWeightSummary { method: st.method, first: st.first, last: st.last, weighted_sum: st.w_sum, }) }) }) } } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weight_first_val(sketch: TimeWeightSummary, _accessor: AccessorFirstVal) -> f64 { time_weight_first_val(sketch) } #[pg_extern(name = "first_val", strict, immutable, parallel_safe)] fn time_weight_first_val(summary: TimeWeightSummary) -> f64 { summary.first.val } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weight_last_val(sketch: TimeWeightSummary, _accessor: AccessorLastVal) -> f64 { time_weight_last_val(sketch) } #[pg_extern(name = "last_val", strict, immutable, parallel_safe)] fn time_weight_last_val(summary: TimeWeightSummary) -> f64 { summary.last.val } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weight_first_time( sketch: TimeWeightSummary, _accessor: AccessorFirstTime, ) -> crate::raw::TimestampTz { time_weight_first_time(sketch) } #[pg_extern(name = "first_time", strict, immutable, parallel_safe)] fn time_weight_first_time(summary: TimeWeightSummary) -> crate::raw::TimestampTz { summary.first.ts.into() } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weight_last_time( sketch: TimeWeightSummary, _accessor: AccessorLastTime, ) -> crate::raw::TimestampTz { time_weight_last_time(sketch) } #[pg_extern(name = "last_time", strict, immutable, parallel_safe)] fn time_weight_last_time(summary: TimeWeightSummary) -> crate::raw::TimestampTz { summary.last.ts.into() } extension_sql!( "\n\ CREATE AGGREGATE time_weight(method text, ts timestamptz, value DOUBLE PRECISION)\n\ (\n\ sfunc = time_weight_trans,\n\ stype = internal,\n\ finalfunc = time_weight_final,\n\ combinefunc = time_weight_combine,\n\ serialfunc = time_weight_trans_serialize,\n\ deserialfunc = time_weight_trans_deserialize,\n\ parallel = restricted\n\ );\n\ \n\ CREATE AGGREGATE rollup(tws TimeWeightSummary)\n\ (\n\ sfunc = time_weight_summary_trans,\n\ stype = internal,\n\ finalfunc = time_weight_final,\n\ combinefunc = time_weight_combine,\n\ serialfunc = time_weight_trans_serialize,\n\ deserialfunc = time_weight_trans_deserialize,\n\ parallel = restricted\n\ );\n\ ", name = "time_weight_agg", requires = [ time_weight_trans, time_weight_final, time_weight_combine, time_weight_trans_serialize, time_weight_trans_deserialize, time_weight_summary_trans ], ); #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weighted_average_average( sketch: Option, _accessor: AccessorAverage, ) -> Option { time_weighted_average_average(sketch) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weighted_average_integral( tws: Option, accessor: AccessorIntegral, ) -> Option { time_weighted_average_integral( tws, String::from_utf8_lossy(&accessor.bytes[..accessor.len as usize]).to_string(), ) } #[pg_extern(immutable, parallel_safe, name = "average")] pub fn time_weighted_average_average(tws: Option) -> Option { match tws { None => None, Some(tws) => match tws.internal().time_weighted_average() { Ok(a) => Some(a), //without bounds, the average for a single value is undefined, but it probably shouldn't throw an error, we'll return null for now. Err(e) => { if e == TimeWeightError::ZeroDuration { None } else { Err(e).unwrap() } } }, } } #[pg_extern(immutable, parallel_safe, name = "integral")] pub fn time_weighted_average_integral( tws: Option, unit: default!(String, "'second'"), ) -> Option { let unit = match DurationUnit::from_str(&unit) { Some(unit) => unit, None => pgrx::error!( "Unrecognized duration unit: {}. Valid units are: usecond, msecond, second, minute, hour", unit, ), }; let integral_microsecs = tws?.internal().time_weighted_integral(); Some(DurationUnit::Microsec.convert_unit(integral_microsecs, unit)) } fn interpolate( tws: Option, start: crate::raw::TimestampTz, duration: crate::raw::Interval, prev: Option, next: Option, ) -> Option { match tws { None => None, Some(tws) => { let interval = crate::datum_utils::interval_to_ms(&start, &duration); Some(tws.interpolate(start.into(), interval, prev, next)) } } } #[pg_extern(immutable, parallel_safe, name = "interpolated_average")] pub fn time_weighted_average_interpolated_average( tws: Option, start: crate::raw::TimestampTz, duration: crate::raw::Interval, prev: default!(Option, "NULL"), next: default!(Option, "NULL"), ) -> Option { let target = interpolate(tws, start, duration, prev, next); time_weighted_average_average(target) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weighted_average_interpolated_average( tws: Option, accessor: TimeWeightInterpolatedAverageAccessor, ) -> Option { let prev = if accessor.flags & 1 == 1 { Some(accessor.prev.clone().into()) } else { None }; let next = if accessor.flags & 2 == 2 { Some(accessor.next.clone().into()) } else { None }; time_weighted_average_interpolated_average( tws, accessor.timestamp.into(), accessor.interval.into(), prev, next, ) } #[pg_extern(immutable, parallel_safe, name = "interpolated_integral")] pub fn time_weighted_average_interpolated_integral( tws: Option, start: crate::raw::TimestampTz, interval: crate::raw::Interval, prev: default!(Option, "NULL"), next: default!(Option, "NULL"), unit: default!(String, "'second'"), ) -> Option { let target = interpolate(tws, start, interval, prev, next); time_weighted_average_integral(target, unit) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_time_weighted_average_interpolated_integral( tws: Option, accessor: TimeWeightInterpolatedIntegralAccessor, ) -> Option { let prev = if accessor.flags & 1 == 1 { Some(accessor.prev.clone().into()) } else { None }; let next = if accessor.flags & 2 == 2 { Some(accessor.next.clone().into()) } else { None }; // Convert from num of milliseconds to DurationUnit and then to string let unit = match accessor.unit { 1 => DurationUnit::Microsec, 1000 => DurationUnit::Millisec, 1_000_000 => DurationUnit::Second, 60_000_000 => DurationUnit::Minute, 3_600_000_000 => DurationUnit::Hour, _ => todo!(), // This should never be reached, the accessor gets these numbers from microseconds() in duration.rs, which only matches on valid enum values } .to_string(); time_weighted_average_interpolated_integral( tws, accessor.start.into(), accessor.interval.into(), prev, next, unit, ) } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; macro_rules! select_one { ($client:expr, $stmt:expr, $type:ty) => { $client .update($stmt, None, &[]) .unwrap() .first() .get_one::<$type>() .unwrap() .unwrap() }; } #[pg_test] fn test_time_weight_aggregate() { Spi::connect_mut(|client| { let stmt = "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION); SET TIME ZONE 'UTC'"; client.update(stmt, None, &[]).unwrap(); // add a point let stmt = "INSERT INTO test VALUES('2020-01-01 00:00:00+00', 10.0)"; client.update(stmt, None, &[]).unwrap(); let stmt = "SELECT integral(time_weight('Trapezoidal', ts, val), 'hrs') FROM test"; assert_eq!(select_one!(client, stmt, f64), 0.0); let stmt = "SELECT integral(time_weight('LOCF', ts, val), 'msecond') FROM test"; assert_eq!(select_one!(client, stmt, f64), 0.0); // add another point let stmt = "INSERT INTO test VALUES('2020-01-01 00:01:00+00', 20.0)"; client.update(stmt, None, &[]).unwrap(); // test basic with 2 points let stmt = "SELECT average(time_weight('Linear', ts, val)) FROM test"; assert!((select_one!(client, stmt, f64) - 15.0).abs() < f64::EPSILON); let stmt = "SELECT average(time_weight('LOCF', ts, val)) FROM test"; assert!((select_one!(client, stmt, f64) - 10.0).abs() < f64::EPSILON); let stmt = "SELECT first_val(time_weight('LOCF', ts, val)) FROM test"; assert!((select_one!(client, stmt, f64) - 10.0).abs() < f64::EPSILON); let stmt = "SELECT last_val(time_weight('LOCF', ts, val)) FROM test"; assert!((select_one!(client, stmt, f64) - 20.0).abs() < f64::EPSILON); // arrow syntax should be the same let stmt = "SELECT time_weight('LOCF', ts, val) -> first_val() FROM test"; assert!((select_one!(client, stmt, f64) - 10.0).abs() < f64::EPSILON); let stmt = "SELECT time_weight('LOCF', ts, val) -> last_val() FROM test"; assert!((select_one!(client, stmt, f64) - 20.0).abs() < f64::EPSILON); let stmt = "SELECT first_time(time_weight('LOCF', ts, val))::text FROM test"; assert_eq!(select_one!(client, stmt, &str), "2020-01-01 00:00:00+00"); let stmt = "SELECT last_time(time_weight('LOCF', ts, val))::text FROM test"; assert_eq!(select_one!(client, stmt, &str), "2020-01-01 00:01:00+00"); // arrow syntax should be the same let stmt = "SELECT (time_weight('LOCF', ts, val) -> first_time())::text FROM test"; assert_eq!(select_one!(client, stmt, &str), "2020-01-01 00:00:00+00"); let stmt = "SELECT (time_weight('LOCF', ts, val) -> last_time())::text FROM test"; assert_eq!(select_one!(client, stmt, &str), "2020-01-01 00:01:00+00"); // more values evenly spaced let stmt = "INSERT INTO test VALUES('2020-01-01 00:02:00+00', 10.0), ('2020-01-01 00:03:00+00', 20.0), ('2020-01-01 00:04:00+00', 10.0)"; client.update(stmt, None, &[]).unwrap(); let stmt = "SELECT average(time_weight('Linear', ts, val)) FROM test"; assert!((select_one!(client, stmt, f64) - 15.0).abs() < f64::EPSILON); let stmt = "SELECT average(time_weight('LOCF', ts, val)) FROM test"; assert!((select_one!(client, stmt, f64) - 15.0).abs() < f64::EPSILON); let stmt = "SELECT integral(time_weight('Linear', ts, val), 'mins') FROM test"; assert!((select_one!(client, stmt, f64) - 60.0).abs() < f64::EPSILON); let stmt = "SELECT integral(time_weight('LOCF', ts, val), 'hour') FROM test"; assert!((select_one!(client, stmt, f64) - 1.0).abs() < f64::EPSILON); //non-evenly spaced values let stmt = "INSERT INTO test VALUES('2020-01-01 00:08:00+00', 30.0), ('2020-01-01 00:10:00+00', 10.0), ('2020-01-01 00:10:30+00', 20.0), ('2020-01-01 00:20:00+00', 30.0)"; client.update(stmt, None, &[]).unwrap(); let stmt = "SELECT average(time_weight('Linear', ts, val)) FROM test"; // expected =(15 +15 +15 +15 + 20*4 + 20*2 +15*.5 + 25*9.5) / 20 = 21.25 just taking the midpoints between each point and multiplying by minutes and dividing by total assert!((select_one!(client, stmt, f64) - 21.25).abs() < f64::EPSILON); let stmt = "SELECT time_weight('Linear', ts, val) \ ->average() \ FROM test"; // arrow syntax should be the same assert!((select_one!(client, stmt, f64) - 21.25).abs() < f64::EPSILON); let stmt = "SELECT integral(time_weight('Linear', ts, val), 'microseconds') FROM test"; assert!((select_one!(client, stmt, f64) - 25500000000.00).abs() < f64::EPSILON); let stmt = "SELECT time_weight('Linear', ts, val) \ ->integral('microseconds') \ FROM test"; // arrow syntax should be the same assert!((select_one!(client, stmt, f64) - 25500000000.00).abs() < f64::EPSILON); let stmt = "SELECT time_weight('Linear', ts, val) \ ->integral() \ FROM test"; assert!((select_one!(client, stmt, f64) - 25500.00).abs() < f64::EPSILON); let stmt = "SELECT average(time_weight('LOCF', ts, val)) FROM test"; // expected = (10 + 20 + 10 + 20 + 10*4 + 30*2 +10*.5 + 20*9.5) / 20 = 17.75 using last value and carrying for each point assert!((select_one!(client, stmt, f64) - 17.75).abs() < f64::EPSILON); let stmt = "SELECT integral(time_weight('LOCF', ts, val), 'milliseconds') FROM test"; assert!((select_one!(client, stmt, f64) - 21300000.0).abs() < f64::EPSILON); //make sure this works with whatever ordering we throw at it let stmt = "SELECT average(time_weight('Linear', ts, val ORDER BY random())) FROM test"; assert!((select_one!(client, stmt, f64) - 21.25).abs() < f64::EPSILON); let stmt = "SELECT average(time_weight('LOCF', ts, val ORDER BY random())) FROM test"; assert!((select_one!(client, stmt, f64) - 17.75).abs() < f64::EPSILON); let stmt = "SELECT integral(time_weight('Linear', ts, val ORDER BY random()), 'seconds') FROM test"; assert!((select_one!(client, stmt, f64) - 25500.0).abs() < f64::EPSILON); let stmt = "SELECT integral(time_weight('LOCF', ts, val ORDER BY random())) FROM test"; assert!((select_one!(client, stmt, f64) - 21300.0).abs() < f64::EPSILON); // make sure we get the same result if we do multi-level aggregation let stmt = "WITH t AS (SELECT date_trunc('minute', ts), time_weight('Linear', ts, val) AS tws FROM test GROUP BY 1) SELECT average(rollup(tws)) FROM t"; assert!((select_one!(client, stmt, f64) - 21.25).abs() < f64::EPSILON); let stmt = "WITH t AS (SELECT date_trunc('minute', ts), time_weight('LOCF', ts, val) AS tws FROM test GROUP BY 1) SELECT average(rollup(tws)) FROM t"; assert!((select_one!(client, stmt, f64) - 17.75).abs() < f64::EPSILON); }); } #[pg_test] fn test_time_weight_io() { Spi::connect_mut(|client| { client.update("SET timezone TO 'UTC'", None, &[]).unwrap(); let stmt = "CREATE TABLE test(ts timestamptz, val DOUBLE PRECISION)"; client.update(stmt, None, &[]).unwrap(); let linear_time_weight = "SELECT time_weight('Linear', ts, val)::TEXT FROM test"; let locf_time_weight = "SELECT time_weight('LOCF', ts, val)::TEXT FROM test"; let avg = |text: &str| format!("SELECT average('{text}'::TimeWeightSummary)"); // add a couple points let stmt = "INSERT INTO test VALUES('2020-01-01 00:00:00+00', 10.0), ('2020-01-01 00:01:00+00', 20.0)"; client.update(stmt, None, &[]).unwrap(); // test basic with 2 points let expected = "(\ version:1,\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ last:(ts:\"2020-01-01 00:01:00+00\",val:20),\ weighted_sum:900000000,\ method:Linear\ )"; assert_eq!(select_one!(client, linear_time_weight, String), expected); assert!((select_one!(client, &*avg(expected), f64) - 15.0).abs() < f64::EPSILON); let expected = "(\ version:1,\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ last:(ts:\"2020-01-01 00:01:00+00\",val:20),\ weighted_sum:600000000,\ method:LOCF\ )"; assert_eq!(select_one!(client, locf_time_weight, String), expected); assert!((select_one!(client, &*avg(expected), f64) - 10.0).abs() < f64::EPSILON); // more values evenly spaced let stmt = "INSERT INTO test VALUES('2020-01-01 00:02:00+00', 10.0), ('2020-01-01 00:03:00+00', 20.0), ('2020-01-01 00:04:00+00', 10.0)"; client.update(stmt, None, &[]).unwrap(); let expected = "(\ version:1,\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ last:(ts:\"2020-01-01 00:04:00+00\",val:10),\ weighted_sum:3600000000,\ method:Linear\ )"; assert_eq!(select_one!(client, linear_time_weight, String), expected); assert!((select_one!(client, &*avg(expected), f64) - 15.0).abs() < f64::EPSILON); let expected = "(\ version:1,\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ last:(ts:\"2020-01-01 00:04:00+00\",val:10),\ weighted_sum:3600000000,\ method:LOCF\ )"; assert_eq!(select_one!(client, locf_time_weight, String), expected); assert!((select_one!(client, &*avg(expected), f64) - 15.0).abs() < f64::EPSILON); //non-evenly spaced values let stmt = "INSERT INTO test VALUES('2020-01-01 00:08:00+00', 30.0), ('2020-01-01 00:10:00+00', 10.0), ('2020-01-01 00:10:30+00', 20.0), ('2020-01-01 00:20:00+00', 30.0)"; client.update(stmt, None, &[]).unwrap(); let expected = "(\ version:1,\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ last:(ts:\"2020-01-01 00:20:00+00\",val:30),\ weighted_sum:25500000000,\ method:Linear\ )"; assert_eq!(select_one!(client, linear_time_weight, String), expected); assert!((select_one!(client, &*avg(expected), f64) - 21.25).abs() < f64::EPSILON); let expected = "(\ version:1,\ first:(ts:\"2020-01-01 00:00:00+00\",val:10),\ last:(ts:\"2020-01-01 00:20:00+00\",val:30),\ weighted_sum:21300000000,\ method:LOCF\ )"; assert_eq!(select_one!(client, locf_time_weight, String), expected); assert!((select_one!(client, &*avg(expected), f64) - 17.75).abs() < f64::EPSILON); }); } #[pg_test] fn test_time_weight_byte_io() { unsafe { use std::ptr; const BASE: i64 = 631152000000000; const MIN: i64 = 60000000; let state = time_weight_trans_inner( None, "linear".to_string(), Some(BASE.into()), Some(10.0), ptr::null_mut(), ); let state = time_weight_trans_inner( state, "linear".to_string(), Some((BASE + MIN).into()), Some(20.0), ptr::null_mut(), ); let state = time_weight_trans_inner( state, "linear".to_string(), Some((BASE + 2 * MIN).into()), Some(30.0), ptr::null_mut(), ); let state = time_weight_trans_inner( state, "linear".to_string(), Some((BASE + 3 * MIN).into()), Some(10.0), ptr::null_mut(), ); let state = time_weight_trans_inner( state, "linear".to_string(), Some((BASE + 4 * MIN).into()), Some(20.0), ptr::null_mut(), ); let state = time_weight_trans_inner( state, "linear".to_string(), Some((BASE + 5 * MIN).into()), Some(30.0), ptr::null_mut(), ); let mut control = state.unwrap(); let buffer = time_weight_trans_serialize(Inner::from(control.clone()).internal().unwrap()); let buffer = pgrx::varlena::varlena_to_byte_slice(buffer.0.cast_mut_ptr()); let expected = [ 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 96, 194, 134, 7, 62, 2, 0, 0, 0, 0, 0, 0, 0, 36, 64, 0, 3, 164, 152, 7, 62, 2, 0, 0, 0, 0, 0, 0, 0, 62, 64, 0, 0, 0, 192, 11, 90, 246, 65, ]; assert_eq!(buffer, expected); let expected = pgrx::varlena::rust_byte_slice_to_bytea(&expected); let new_state = time_weight_trans_deserialize_inner(bytea(pg_sys::Datum::from(expected.as_ptr()))); control.combine_summaries(); // Serialized form is always combined assert_eq!(&*new_state, &*control); } } #[pg_test] fn test_time_weight_interpolation() { Spi::connect_mut(|client| { client.update( "CREATE TABLE test(time timestamptz, value double precision, bucket timestamptz)", None, &[] ).unwrap(); client .update( r#"INSERT INTO test VALUES ('2020-1-1 8:00'::timestamptz, 10.0, '2020-1-1'::timestamptz), ('2020-1-1 12:00'::timestamptz, 40.0, '2020-1-1'::timestamptz), ('2020-1-1 16:00'::timestamptz, 20.0, '2020-1-1'::timestamptz), ('2020-1-2 2:00'::timestamptz, 15.0, '2020-1-2'::timestamptz), ('2020-1-2 12:00'::timestamptz, 50.0, '2020-1-2'::timestamptz), ('2020-1-2 20:00'::timestamptz, 25.0, '2020-1-2'::timestamptz), ('2020-1-3 10:00'::timestamptz, 30.0, '2020-1-3'::timestamptz), ('2020-1-3 12:00'::timestamptz, 0.0, '2020-1-3'::timestamptz), ('2020-1-3 16:00'::timestamptz, 35.0, '2020-1-3'::timestamptz)"#, None, &[], ) .unwrap(); let mut averages = client .update( r#"SELECT interpolated_average( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, time_weight('LOCF', time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // test arrow version let mut arrow_averages = client .update( r#"SELECT agg -> interpolated_average( bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, time_weight('LOCF', time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); let mut integrals = client .update( r#"SELECT interpolated_integral( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket), 'hours' ) FROM ( SELECT bucket, time_weight('LOCF', time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // verify that default value works client .update( r#"SELECT interpolated_integral( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket) ) FROM ( SELECT bucket, time_weight('LOCF', time, value) as agg FROM test GROUP BY bucket ) s ORDER BY bucket"#, None, &[], ) .unwrap(); // Day 1, 4 hours @ 10, 4 @ 40, 8 @ 20 let result = averages.next().unwrap()[1].value().unwrap(); assert_eq!(result, Some((4. * 10. + 4. * 40. + 8. * 20.) / 16.)); assert_eq!(result, arrow_averages.next().unwrap()[1].value().unwrap()); assert_eq!( integrals.next().unwrap()[1].value().unwrap(), Some(4. * 10. + 4. * 40. + 8. * 20.) ); // Day 2, 2 hours @ 20, 10 @ 15, 8 @ 50, 4 @ 25 let result = averages.next().unwrap()[1].value().unwrap(); assert_eq!( result, Some((2. * 20. + 10. * 15. + 8. * 50. + 4. * 25.) / 24.) ); assert_eq!(result, arrow_averages.next().unwrap()[1].value().unwrap()); assert_eq!( integrals.next().unwrap()[1].value().unwrap(), Some(2. * 20. + 10. * 15. + 8. * 50. + 4. * 25.) ); // Day 3, 10 hours @ 25, 2 @ 30, 4 @ 0, 8 @ 35 let result = averages.next().unwrap()[1].value().unwrap(); assert_eq!(result, Some((10. * 25. + 2. * 30. + 8. * 35.) / 24.)); assert_eq!(result, arrow_averages.next().unwrap()[1].value().unwrap()); assert_eq!( integrals.next().unwrap()[1].value().unwrap(), Some(10. * 25. + 2. * 30. + 8. * 35.) ); assert!(averages.next().is_none()); assert!(arrow_averages.next().is_none()); assert!(integrals.next().is_none()); }); } #[pg_test] fn test_locf_interpolation_to_null() { Spi::connect_mut(|client| { let stmt = "SELECT interpolated_average(time_weight('locf', '2020-01-01 20:00:00+00', 100), '2020-01-01 00:00:00+00', '1d')"; assert_eq!(select_one!(client, stmt, f64), 100.0); let stmt = "SELECT time_weight('locf', '2020-01-01 20:00:00+00', 100) -> interpolated_integral('2020-01-01 00:00:00+00', '1d')"; assert_eq!(select_one!(client, stmt, f64), 1440000.0); }); } } ================================================ FILE: extension/src/type_builder.rs ================================================ #[derive(Copy, Clone, Debug, serde::Serialize)] pub enum CachedDatum<'r> { None, FromInput(&'r [u8]), Flattened(&'r [u8]), } impl PartialEq for CachedDatum<'_> { fn eq(&self, _: &Self) -> bool { true } } // XXX Required by [`pgrx::PostgresType`] for default [`pgrx::FromDatum`] // implementation but isn't used since we implement [`pgrx::FromDatum`] // ourselves. We need a custom implementation because with the default one the // compiler complains that `'input` and `'de` lifetimes are incompatible. impl<'de> serde::Deserialize<'de> for CachedDatum<'_> { fn deserialize(_deserializer: D) -> Result where D: serde::de::Deserializer<'de>, { unimplemented!(); } } // Routes to pg_type_impl! for lifetime types, pg_type_no_lifetime_impl! for non-lifetime types #[macro_export] macro_rules! pg_type { // BASE CASE: Struct WITH lifetimes → route to pg_type_impl! ( $(#[$attrs: meta])* struct $name: ident<$($inlife: lifetime),+> { $(,)? } $(%($($vals:tt)*))? ) => { $crate::pg_type_impl!{ 'input $(#[$attrs])* struct $name<$($inlife),+> { $($($vals)*)? } } }; // BASE CASE: Struct WITHOUT lifetimes → route to pg_type_no_lifetime_impl! ( $(#[$attrs: meta])* struct $name: ident { $(,)? } $(%($($vals:tt)*))? ) => { $crate::pg_type_no_lifetime_impl!{ $(#[$attrs])* struct $name { $($($vals)*)? } } }; // FIELD PROCESSING: Struct WITH lifetimes // Recurses into this very same macro ( $(#[$attrs: meta])* struct $name: ident<$($inlife: lifetime),+> { $(#[$fattrs: meta])* $field:ident : $typ: tt $(<$life:lifetime>)?, $($tail: tt)* } $(%($($vals:tt)*))? ) => { $crate::pg_type!{ $(#[$attrs])* struct $name<$($inlife),+> { $($tail)* } %( $($($vals)*)? $(#[$fattrs])* $field : $typ $(<$life>)? , ) } }; // FIELD PROCESSING: Struct WITHOUT lifetimes // Recurses into this very same macro ( $(#[$attrs: meta])* struct $name: ident { $(#[$fattrs: meta])* $field:ident : $typ: ty, $($tail: tt)* } $(%($($vals:tt)*))? ) => { $crate::pg_type!{ $(#[$attrs])* struct $name { $($tail)* } %( $($($vals)*)? $(#[$fattrs])* $field : $typ , ) } }; } #[macro_export] macro_rules! pg_type_impl { ( $lifetemplate: lifetime $(#[$attrs: meta])* struct $name: ident $(<$inlife: lifetime>)? { $($(#[$fattrs: meta])* $field:ident : $typ: tt $(<$life:lifetime>)?),* $(,)? } ) => { ::paste::paste! { // This is where the main difference between the lifetime and no-lifetime versions is // that the generated struct DOES NOT derive form PostgresType. This is because the // lifetime support in the derive macro is fundamentally broken because some generated // functions don't have an input parameter where the lifetime parameter can be bound to. $(#[$attrs])* #[derive(Clone, serde::Serialize, serde::Deserialize)] pub struct $name<$lifetemplate>(pub [<$name Data>] $(<$inlife>)?, $crate::type_builder::CachedDatum<$lifetemplate>); // Manual PostgresType implementation - the derive macro can't handle lifetimes in pgrx 0.16.0 impl<$lifetemplate> ::pgrx::datum::PostgresType for $name<$lifetemplate> {} unsafe impl<$lifetemplate> ::pgrx_sql_entity_graph::metadata::SqlTranslatable for $name<$lifetemplate> { fn argument_sql() -> core::result::Result<::pgrx_sql_entity_graph::metadata::SqlMapping, ::pgrx_sql_entity_graph::metadata::ArgumentError> { Ok(::pgrx_sql_entity_graph::metadata::SqlMapping::As(String::from(stringify!($name)))) } fn return_sql() -> core::result::Result<::pgrx_sql_entity_graph::metadata::Returns, ::pgrx_sql_entity_graph::metadata::ReturnsError> { Ok(::pgrx_sql_entity_graph::metadata::Returns::One(::pgrx_sql_entity_graph::metadata::SqlMapping::As(String::from(stringify!($name))))) } } ::paste::paste! { #[unsafe(no_mangle)] #[doc(hidden)] #[allow(nonstandard_style, unknown_lints, clippy::no_mangle_with_rust_abi)] pub extern "Rust" fn [<__pgrx_internals_type_ $name>]() -> ::pgrx_sql_entity_graph::SqlGraphEntity { extern crate alloc; use alloc::string::ToString; use ::pgrx::datum::WithTypeIds; let mut mappings = Default::default(); <$name<'_> as ::pgrx::datum::WithTypeIds>::register_with_refs( &mut mappings, stringify!($name).to_string() ); ::pgrx::datum::WithSizedTypeIds::<$name<'_>>::register_sized_with_refs( &mut mappings, stringify!($name).to_string() ); ::pgrx::datum::WithArrayTypeIds::<$name<'_>>::register_array_with_refs( &mut mappings, stringify!($name).to_string() ); ::pgrx::datum::WithVarlenaTypeIds::<$name<'_>>::register_varlena_with_refs( &mut mappings, stringify!($name).to_string() ); let submission = ::pgrx_sql_entity_graph::PostgresTypeEntity { name: stringify!($name), file: file!(), line: line!(), module_path: module_path!(), full_path: core::any::type_name::<$name<'_>>(), mappings: mappings.into_iter().collect(), in_fn: stringify!([<$name:lower _in>]), in_fn_module_path: module_path!().to_string(), out_fn: stringify!([<$name:lower _out>]), out_fn_module_path: module_path!().to_string(), receive_fn: None, receive_fn_module_path: None, send_fn: None, send_fn_module_path: None, to_sql_config: ::pgrx::pgrx_sql_entity_graph::ToSqlConfigEntity { enabled: true, callback: None, content: None, }, alignment: None, }; ::pgrx_sql_entity_graph::SqlGraphEntity::Type(submission) } } #[doc(hidden)] #[::pgrx::pgrx_macros::pg_extern(immutable, parallel_safe)] pub fn [<$name:lower _in>](input: Option<&::core::ffi::CStr>) -> Option<$name<'static>> { input.map_or_else(|| { if let Some(m) = <$name<'static> as ::pgrx::inoutfuncs::InOutFuncs>::NULL_ERROR_MESSAGE { ::pgrx::pg_sys::error!("{m}"); } None }, |i| Some(<$name<'static> as ::pgrx::inoutfuncs::InOutFuncs>::input(i))) } #[doc(hidden)] #[::pgrx::pgrx_macros::pg_extern(immutable, parallel_safe)] pub fn [<$name:lower _out>](input: $name<'static>) -> ::pgrx::ffi::CString { let mut buffer = ::pgrx::stringinfo::StringInfo::new(); ::pgrx::inoutfuncs::InOutFuncs::output(&input, &mut buffer); // SAFETY: We just constructed this StringInfo ourselves unsafe { buffer.leak_cstr().to_owned() } } flat_serialize_macro::flat_serialize! { $(#[$attrs])* #[derive(serde::Serialize, serde::Deserialize)] struct [<$name Data>] $(<$inlife>)? { #[serde(skip, default="crate::serialization::serde_reference_adaptor::default_header")] header: u32, version: u8, #[serde(skip, default="crate::serialization::serde_reference_adaptor::default_padding")] padding: [u8; 3], $($(#[$fattrs])* $field: $typ $(<$life>)?),* } } impl<'input> $name<'input> { pub fn in_current_context<'foo>(&self) -> $name<'foo> { unsafe { self.0.flatten() } } #[allow(clippy::missing_safety_doc)] pub unsafe fn cached_datum_or_flatten(&mut self) -> pgrx::pg_sys::Datum { use $crate::type_builder::CachedDatum::*; match self.1 { None => { *self = unsafe { self.0.flatten() }; unsafe { self.cached_datum_or_flatten() } }, FromInput(bytes) | Flattened(bytes) => pg_sys::Datum::from(bytes.as_ptr()), } } } impl<$lifetemplate> [<$name Data>] $(<$inlife>)? { #[allow(clippy::missing_safety_doc)] pub unsafe fn flatten<'any>(&self) -> $name<'any> { use flat_serialize::FlatSerializable as _; use $crate::type_builder::CachedDatum::Flattened; // if we already have a CachedDatum::Flattened can just // return it without re-flattening? // TODO this needs extensive testing before we enable it // XXX this will not work if the lifetime of the memory // context the value was previously flattened into is // wrong; this may be bad enough that we should never // enable it by default... // if let Flattened(bytes) = self.1 { // let bytes = extend_lifetime(bytes); // let wrapped = [<$name Data>]::try_ref(bytes).unwrap().0; // $name(wrapped, Flattened(bytes)) // return self // } let bytes: &'static [u8] = self.to_pg_bytes(); let wrapped = [<$name Data>]::try_ref(bytes).unwrap().0; $name(wrapped, Flattened(bytes)) } pub fn to_pg_bytes(&self) -> &'static [u8] { use std::{mem::MaybeUninit, slice}; use flat_serialize::FlatSerializable as _; unsafe { let len = self.num_bytes(); // valena types have a maximum size if len > 0x3FFFFFFF { pgrx::error!("size {} bytes is to large", len) } let memory: *mut MaybeUninit = pg_sys::palloc0(len).cast(); let slice = slice::from_raw_parts_mut(memory, len); let rem = self.fill_slice(slice); debug_assert_eq!(rem.len(), 0); ::pgrx::set_varsize_4b(memory.cast(), len as i32); slice::from_raw_parts(memory.cast(), len) } } } impl<$lifetemplate> pgrx::FromDatum for $name<$lifetemplate> { unsafe fn from_polymorphic_datum(datum: pgrx::pg_sys::Datum, is_null: bool, _: pg_sys::Oid) -> Option where Self: Sized, { use flat_serialize::FlatSerializable as _; if is_null { return None; } let mut ptr = pg_sys::pg_detoast_datum_packed(datum.cast_mut_ptr()); //TODO is there a better way to do this? if pgrx::varatt_is_1b(ptr) { ptr = pg_sys::pg_detoast_datum_copy(ptr); } let data_len = pgrx::varsize_any(ptr); // NOTE: varlena types are aligned according to the `ALIGNMENT` with which they // are configured in CREATE TYPE. We have (historically) not configured the // `ALIGNMENT` of our types, resulting in them having the default (for varlena) // 4-byte alignment. // Some types can only be safely deserialized by `flat_serialize::try_ref` if // they are 8-byte aligned (structs which contain slices of 8-byte aligned data, // because of `flat_serialize::try_ref`'s usage of `slice::from_raw_parts`). // // To correct for this, when the data is not aligned, we copy it into a new, // aligned, memory location. // XXX: Technically, we're going to copy more frequently than strictly necessary // because `flat_serialize::try_ref` _can_ safely deserialize types which are // not 8-byte aligned, but contain fields which require 8-byte alignment (as // long as they're not slices) because it uses `ptr::read_unaligned`. let is_aligned = ptr.cast::<$name>().is_aligned(); let bytes = if !is_aligned { let unaligned_bytes = std::slice::from_raw_parts(ptr as *mut u8, data_len); let new_bytes = pgrx::pg_sys::palloc0(data_len); // Note: we assume that fresh allocations are 8-byte aligned debug_assert!(new_bytes.cast::<$name>().is_aligned()); let new_slice: &mut [u8] = std::slice::from_raw_parts_mut(new_bytes.cast(), data_len); new_slice.copy_from_slice(unaligned_bytes); new_slice } else { std::slice::from_raw_parts(ptr as *mut u8, data_len) }; let (data, _) = match [<$name Data>]::try_ref(bytes) { Ok(wrapped) => wrapped, Err(e) => error!(concat!("invalid ", stringify!($name), " {:?}, got len {}"), e, bytes.len()), }; $name(data, $crate::type_builder::CachedDatum::FromInput(bytes)).into() } } impl<$lifetemplate> pgrx::IntoDatum for $name<$lifetemplate> { fn into_datum(self) -> Option { use $crate::type_builder::CachedDatum::*; let datum = match self.1 { Flattened(bytes) => pg_sys::Datum::from(bytes.as_ptr()), FromInput(..) | None => pg_sys::Datum::from(self.0.to_pg_bytes().as_ptr()), }; Some(datum) } fn type_oid() -> pg_sys::Oid { rust_regtypein::() } } unsafe impl<$lifetemplate> ::pgrx::callconv::BoxRet for $name<$lifetemplate> { unsafe fn box_into<'fcx>( self, fcinfo: &mut ::pgrx::callconv::FcInfo<'fcx>, ) -> ::pgrx::datum::Datum<'fcx> { match ::pgrx::datum::IntoDatum::into_datum(self) { None => fcinfo.return_null(), Some(datum) => unsafe { fcinfo.return_raw_datum(datum) } } } } unsafe impl<'fcx, $lifetemplate> callconv::ArgAbi<'fcx> for $name<$lifetemplate> where Self: 'fcx, { unsafe fn unbox_arg_unchecked(arg: callconv::Arg<'_, 'fcx>) -> Self { let index = arg.index(); unsafe { arg.unbox_arg_using_from_datum().unwrap_or_else(|| panic!("argument {index} must not be null")) } } unsafe fn unbox_nullable_arg(arg: callconv::Arg<'_, 'fcx>) -> nullable::Nullable { unsafe { arg.unbox_arg_using_from_datum().into() } } } impl<$lifetemplate> ::std::ops::Deref for $name <$lifetemplate> { type Target=[<$name Data>] $(<$inlife>)?; fn deref(&self) -> &Self::Target { &self.0 } } impl<$lifetemplate> ::std::ops::DerefMut for $name <$lifetemplate> { fn deref_mut(&mut self) -> &mut Self::Target { self.1 = $crate::type_builder::CachedDatum::None; &mut self.0 } } impl<$lifetemplate> From<[<$name Data>]$(<$inlife>)?> for $name<$lifetemplate> { fn from(inner: [<$name Data>]$(<$inlife>)?) -> Self { Self(inner, $crate::type_builder::CachedDatum::None) } } impl<$lifetemplate> From<[<$name Data>]$(<$inlife>)?> for Option<$name<$lifetemplate>> { fn from(inner: [<$name Data>]$(<$inlife>)?) -> Self { Some($name(inner, $crate::type_builder::CachedDatum::None)) } } } } } #[macro_export] macro_rules! pg_type_no_lifetime_impl { ( $(#[$attrs: meta])* struct $name: ident { $($(#[$fattrs: meta])* $field:ident : $typ: ty),* $(,)? } ) => { ::paste::paste! { $(#[$attrs])* #[derive(pgrx::PostgresType, Clone, serde::Serialize, serde::Deserialize)] #[inoutfuncs] #[bikeshed_postgres_type_manually_impl_from_into_datum] pub struct $name(pub [<$name Data>], $crate::type_builder::CachedDatum<'static>); flat_serialize_macro::flat_serialize! { $(#[$attrs])* #[derive(serde::Serialize, serde::Deserialize)] struct [<$name Data>] { #[serde(skip, default="crate::serialization::serde_reference_adaptor::default_header")] header: u32, version: u8, #[serde(skip, default="crate::serialization::serde_reference_adaptor::default_padding")] padding: [u8; 3], $($(#[$fattrs])* $field: $typ),* } } impl $name { pub fn in_current_context(&self) -> $name { unsafe { self.0.flatten() } } #[allow(clippy::missing_safety_doc)] pub unsafe fn cached_datum_or_flatten(&mut self) -> pgrx::pg_sys::Datum { use $crate::type_builder::CachedDatum::*; match self.1 { None => { *self = unsafe { self.0.flatten() }; self.cached_datum_or_flatten() }, FromInput(bytes) | Flattened(bytes) => pg_sys::Datum::from(bytes.as_ptr()), } } } impl [<$name Data>] { #[allow(clippy::missing_safety_doc)] pub unsafe fn flatten(&self) -> $name { use flat_serialize::FlatSerializable as _; use $crate::type_builder::CachedDatum::Flattened; let bytes: &'static [u8] = self.to_pg_bytes(); let wrapped = [<$name Data>]::try_ref(bytes).unwrap().0; $name(wrapped, Flattened(bytes)) } pub fn to_pg_bytes(&self) -> &'static [u8] { use std::{mem::MaybeUninit, slice}; use flat_serialize::FlatSerializable as _; unsafe { let len = self.num_bytes(); // valena types have a maximum size if len > 0x3FFFFFFF { pgrx::error!("size {} bytes is to large", len) } let memory: *mut MaybeUninit = pg_sys::palloc0(len).cast(); let slice = slice::from_raw_parts_mut(memory, len); let rem = self.fill_slice(slice); debug_assert_eq!(rem.len(), 0); ::pgrx::set_varsize_4b(memory.cast(), len as i32); slice::from_raw_parts(memory.cast(), len) } } } impl pgrx::FromDatum for $name { unsafe fn from_polymorphic_datum(datum: pgrx::pg_sys::Datum, is_null: bool, _: pg_sys::Oid) -> Option where Self: Sized, { use flat_serialize::FlatSerializable as _; if is_null { return None; } let mut ptr = pg_sys::pg_detoast_datum_packed(datum.cast_mut_ptr()); //TODO is there a better way to do this? if pgrx::varatt_is_1b(ptr) { ptr = pg_sys::pg_detoast_datum_copy(ptr); } let data_len = pgrx::varsize_any(ptr); let is_aligned = ptr.cast::<$name>().is_aligned(); let bytes = if !is_aligned { let unaligned_bytes = std::slice::from_raw_parts(ptr as *mut u8, data_len); let new_bytes = pgrx::pg_sys::palloc0(data_len); debug_assert!(new_bytes.cast::<$name>().is_aligned()); let new_slice: &mut [u8] = std::slice::from_raw_parts_mut(new_bytes.cast(), data_len); new_slice.copy_from_slice(unaligned_bytes); new_slice } else { std::slice::from_raw_parts(ptr as *mut u8, data_len) }; let (data, _) = match [<$name Data>]::try_ref(bytes) { Ok(wrapped) => wrapped, Err(e) => error!(concat!("invalid ", stringify!($name), " {:?}, got len {}"), e, bytes.len()), }; $name(data, $crate::type_builder::CachedDatum::FromInput(bytes)).into() } } impl pgrx::IntoDatum for $name { fn into_datum(self) -> Option { use $crate::type_builder::CachedDatum::*; let datum = match self.1 { Flattened(bytes) => pg_sys::Datum::from(bytes.as_ptr()), FromInput(..) | None => pg_sys::Datum::from(self.0.to_pg_bytes().as_ptr()), }; Some(datum) } fn type_oid() -> pg_sys::Oid { rust_regtypein::() } } unsafe impl ::pgrx::callconv::BoxRet for $name { unsafe fn box_into<'fcx>( self, fcinfo: &mut ::pgrx::callconv::FcInfo<'fcx>, ) -> ::pgrx::datum::Datum<'fcx> { match ::pgrx::datum::IntoDatum::into_datum(self) { None => fcinfo.return_null(), Some(datum) => unsafe { fcinfo.return_raw_datum(datum) } } } } unsafe impl<'fcx> callconv::ArgAbi<'fcx> for $name where Self: 'fcx, { unsafe fn unbox_arg_unchecked(arg: callconv::Arg<'_, 'fcx>) -> Self { let index = arg.index(); unsafe { arg.unbox_arg_using_from_datum().unwrap_or_else(|| panic!("argument {index} must not be null")) } } unsafe fn unbox_nullable_arg(arg: callconv::Arg<'_, 'fcx>) -> nullable::Nullable { unsafe { arg.unbox_arg_using_from_datum().into() } } } impl ::std::ops::Deref for $name { type Target=[<$name Data>]; fn deref(&self) -> &Self::Target { &self.0 } } impl ::std::ops::DerefMut for $name { fn deref_mut(&mut self) -> &mut Self::Target { self.1 = $crate::type_builder::CachedDatum::None; &mut self.0 } } impl From<[<$name Data>]> for $name { fn from(inner: [<$name Data>]) -> Self { Self(inner, $crate::type_builder::CachedDatum::None) } } impl From<[<$name Data>]> for Option<$name> { fn from(inner: [<$name Data>]) -> Self { Some($name(inner, $crate::type_builder::CachedDatum::None)) } } } } } // Routes to ron_inout_funcs_impl! for lifetime types_impl! for non-lifetime types #[macro_export] macro_rules! ron_inout_funcs { // Pattern 1: Explicit lifetime parameter → route to ron_inout_funcs_impl! ($name:ident<$lifetime:lifetime>) => { $crate::ron_inout_funcs_impl!($name); }; // Pattern 2: No lifetime parameter → route to ron_inout_funcs_no_lifetime_impl! ($name:ident) => { $crate::ron_inout_funcs_no_lifetime_impl!($name); }; } // Implementation macro for types without lifetimes (used internally by unified ron_inout_funcs!) #[macro_export] macro_rules! ron_inout_funcs_no_lifetime_impl { ($name:ident) => { impl InOutFuncs for $name { fn output(&self, buffer: &mut StringInfo) { use $crate::serialization::{str_to_db_encoding, EncodedStr::*}; let stringified = ron::to_string(&**self).unwrap(); match str_to_db_encoding(&stringified) { Utf8(s) => buffer.push_str(s), Other(s) => buffer.push_bytes(s.to_bytes()), } } fn input(input: &std::ffi::CStr) -> $name where Self: Sized, { use $crate::serialization::str_from_db_encoding; let input = str_from_db_encoding(input); let val = ron::from_str(input).unwrap(); unsafe { Self(val, $crate::type_builder::CachedDatum::None).flatten() } } } }; } // Implementation macro for lifetime-parameterized types (used internally by unified ron_inout_funcs!) #[macro_export] macro_rules! ron_inout_funcs_impl { ($name:ident) => { impl<'input> InOutFuncs for $name<'input> { fn output(&self, buffer: &mut StringInfo) { use $crate::serialization::{str_to_db_encoding, EncodedStr::*}; let stringified = ron::to_string(&**self).unwrap(); match str_to_db_encoding(&stringified) { Utf8(s) => buffer.push_str(s), Other(s) => buffer.push_bytes(s.to_bytes()), } } fn input(input: &std::ffi::CStr) -> $name<'input> where Self: Sized, { use $crate::serialization::str_from_db_encoding; let input = str_from_db_encoding(input); let val = ron::from_str(input).unwrap(); unsafe { Self(val, $crate::type_builder::CachedDatum::None).flatten() } } } }; } #[macro_export] macro_rules! flatten { ($typ:ident { $($field:ident$(: $value:expr)?),* $(,)? }) => { { let data = ::paste::paste! { [<$typ Data>] { header: 0, version: 1, padding: [0; 3], $( $field$(: $value)? ),* } }; data.flatten() } } } #[macro_export] macro_rules! build { ($typ:ident { $($field:ident$(: $value:expr)?),* $(,)? }) => { { <$typ>::from(::paste::paste! { [<$typ Data>] { header: 0, version: 1, padding: [0; 3], $( $field$(: $value)? ),* } }) } } } #[repr(u8)] pub enum SerializationType { Default = 1, } #[macro_export] macro_rules! do_serialize { ($state: ident) => { { $crate::do_serialize!($state, version: 1) } }; ($state: ident, version: $version: expr) => { { use $crate::type_builder::SerializationType; use std::io::{Cursor, Write}; use std::convert::TryInto; let state = &*$state; let serialized_size = bincode::serialized_size(state) .unwrap_or_else(|e| pgrx::error!("serialization error {}", e)); let our_size = serialized_size + 2; // size of serialized data + our version flags let allocated_size = our_size + 4; // size of our data + the varlena header let allocated_size = allocated_size.try_into() .unwrap_or_else(|e| pgrx::error!("serialization error {}", e)); // valena types have a maximum size if allocated_size > 0x3FFFFFFF { pgrx::error!("size {} bytes is to large", allocated_size) } let bytes: &mut [u8] = unsafe { let bytes = pgrx::pg_sys::palloc0(allocated_size); std::slice::from_raw_parts_mut(bytes.cast(), allocated_size) }; let mut writer = Cursor::new(bytes); // varlena header space let varsize = [0; 4]; writer.write_all(&varsize) .unwrap_or_else(|e| pgrx::error!("serialization error {}", e)); // type version writer.write_all(&[$version]) .unwrap_or_else(|e| pgrx::error!("serialization error {}", e)); // serialization version; 1 for bincode is currently the only option writer.write_all(&[SerializationType::Default as u8]) .unwrap_or_else(|e| pgrx::error!("serialization error {}", e)); bincode::serialize_into(&mut writer, state) .unwrap_or_else(|e| pgrx::error!("serialization error {}", e)); unsafe { let len = writer.position().try_into().expect("serialized size too large"); ::pgrx::set_varsize_4b(writer.get_mut().as_mut_ptr() as *mut _, len); } $crate::raw::bytea::from(pg_sys::Datum::from(writer.into_inner().as_mut_ptr())) } }; } #[macro_export] macro_rules! do_deserialize { ($bytes: expr, $t: ty) => {{ use $crate::type_builder::SerializationType; let input: $crate::raw::bytea = $bytes; let state: $t = unsafe { let input: pgrx::pg_sys::Datum = input.into(); let detoasted = pg_sys::pg_detoast_datum_packed(input.cast_mut_ptr()); let len = pgrx::varsize_any_exhdr(detoasted); let data = pgrx::vardata_any(detoasted); let bytes = std::slice::from_raw_parts(data as *mut u8, len); if bytes.len() < 1 { pgrx::error!("deserialization error, no bytes") } if bytes[0] != 1 { pgrx::error!( "deserialization error, invalid serialization version {}", bytes[0] ) } if bytes[1] != SerializationType::Default as u8 { pgrx::error!( "deserialization error, invalid serialization type {}", bytes[1] ) } bincode::deserialize(&bytes[2..]) .unwrap_or_else(|e| pgrx::error!("deserialization error {}", e)) }; state.into() }}; } ================================================ FILE: extension/src/uddsketch.rs ================================================ use pgrx::*; use encodings::{delta, prefix_varint}; use uddsketch::{SketchHashKey, UDDSketch as UddSketchInternal, UDDSketchMetadata}; use crate::{ accessors::{ AccessorApproxPercentile, AccessorApproxPercentileRank, AccessorError, AccessorMean, AccessorNumVals, AccessorPercentileArray, }, aggregate_utils::in_aggregate_context, flatten, palloc::{Inner, Internal, InternalAsValue, ToInternal}, pg_type, }; // PG function for adding values to a sketch. // Null values are ignored. #[pg_extern(immutable, parallel_safe)] pub fn uddsketch_trans( state: Internal, size: i32, max_error: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { uddsketch_trans_inner(unsafe { state.to_inner() }, size, max_error, value, fcinfo).internal() } pub fn uddsketch_trans_inner( state: Option>, size: i32, max_error: f64, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { let max_size = u32::try_from(size).unwrap_or(PERCENTILE_AGG_DEFAULT_SIZE); unsafe { in_aggregate_context(fcinfo, || { let value = match value { None => return state, Some(value) => value, }; let mut state = match state { None => UddSketchInternal::new(max_size, max_error).into(), Some(state) => state, }; state.add_value(value); Some(state) }) } } const PERCENTILE_AGG_DEFAULT_SIZE: u32 = 200; const PERCENTILE_AGG_DEFAULT_ERROR: f64 = 0.001; // transition function for the simpler percentile_agg aggregate, which doesn't // take parameters for the size and error, but uses a default #[pg_extern(immutable, parallel_safe)] pub fn percentile_agg_trans( state: Internal, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { percentile_agg_trans_inner(unsafe { state.to_inner() }, value, fcinfo).internal() } pub fn percentile_agg_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { let default_size = PERCENTILE_AGG_DEFAULT_SIZE; let default_max_error = PERCENTILE_AGG_DEFAULT_ERROR; uddsketch_trans_inner(state, default_size as _, default_max_error, value, fcinfo) } // PG function for merging sketches. #[pg_extern(immutable, parallel_safe)] pub fn uddsketch_combine( state1: Internal, state2: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { uddsketch_combine_inner(state1.to_inner(), state2.to_inner(), fcinfo).internal() } } pub fn uddsketch_combine_inner( state1: Option>, state2: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || match (state1, state2) { (None, None) => None, (None, Some(state2)) => Some(state2.clone().into()), (Some(state1), None) => Some(state1.clone().into()), (Some(state1), Some(state2)) => { let mut sketch = state1.clone(); sketch.merge_sketch(&state2); Some(sketch.into()) } }) } } use crate::raw::bytea; #[pg_extern(immutable, parallel_safe, strict)] pub fn uddsketch_serialize(state: Internal) -> bytea { let serializable = &SerializedUddSketch::from(unsafe { state.get().unwrap() }); crate::do_serialize!(serializable) } #[pg_extern(strict, immutable, parallel_safe)] pub fn uddsketch_deserialize(bytes: bytea, _internal: Internal) -> Option { uddsketch_deserialize_inner(bytes).internal() } pub fn uddsketch_deserialize_inner(bytes: bytea) -> Inner { let sketch: UddSketchInternal = crate::do_deserialize!(bytes, SerializedUddSketch); sketch.into() } #[derive(serde::Serialize, serde::Deserialize)] struct SerializedUddSketch { alpha: f64, max_buckets: u32, num_buckets: u32, compactions: u32, count: u64, sum: f64, buckets: CompressedBuckets, } impl From<&UddSketchInternal> for SerializedUddSketch { fn from(sketch: &UddSketchInternal) -> Self { let buckets = compress_buckets(sketch.bucket_iter()); SerializedUddSketch { alpha: sketch.max_error(), max_buckets: sketch.max_allowed_buckets(), num_buckets: sketch.current_buckets_count() as u32, compactions: u32::from(sketch.times_compacted()), count: sketch.count(), sum: sketch.sum(), buckets, } } } impl From for UddSketchInternal { fn from(sketch: SerializedUddSketch) -> Self { UddSketchInternal::new_from_data( &UDDSketchMetadata { max_buckets: sketch.max_buckets, current_error: sketch.alpha, compactions: u8::try_from(sketch.compactions) .expect("compactions cannot be higher than 65"), values: sketch.count, sum: sketch.sum, buckets: sketch.num_buckets, }, sketch.keys(), sketch.counts(), ) } } impl SerializedUddSketch { fn keys(&self) -> impl Iterator + '_ { decompress_keys( &self.buckets.negative_indexes, self.buckets.zero_bucket_count != 0, &self.buckets.positive_indexes, ) } fn counts(&self) -> impl Iterator + '_ { decompress_counts( &self.buckets.negative_counts, self.buckets.zero_bucket_count, &self.buckets.positive_counts, ) } } // PG object for the sketch. pg_type! { #[derive(Debug)] struct UddSketch<'input> { alpha: f64, max_buckets: u32, num_buckets: u32, compactions: u64, count: u64, sum: f64, zero_bucket_count: u64, neg_indexes_bytes: u32, neg_buckets_bytes: u32, pos_indexes_bytes: u32, pos_buckets_bytes: u32, negative_indexes: [u8; self.neg_indexes_bytes], negative_counts: [u8; self.neg_buckets_bytes], positive_indexes: [u8; self.pos_indexes_bytes], positive_counts: [u8; self.pos_buckets_bytes], } } #[derive(serde::Serialize, serde::Deserialize)] struct ReadableUddSketch { version: u8, alpha: f64, max_buckets: u32, num_buckets: u32, compactions: u64, count: u64, sum: f64, buckets: Vec<(SketchHashKey, u64)>, } impl From<&UddSketch<'_>> for ReadableUddSketch { fn from(sketch: &UddSketch<'_>) -> Self { ReadableUddSketch { version: sketch.version, alpha: sketch.alpha, max_buckets: sketch.max_buckets, num_buckets: sketch.num_buckets, compactions: sketch.compactions, count: sketch.count, sum: sketch.sum, buckets: sketch.keys().zip(sketch.counts()).collect(), } } } impl<'a, 'b> From<&'a ReadableUddSketch> for UddSketch<'b> { fn from(sketch: &'a ReadableUddSketch) -> Self { assert_eq!(sketch.version, 1); let CompressedBuckets { negative_indexes, negative_counts, zero_bucket_count, positive_indexes, positive_counts, } = compress_buckets(sketch.buckets.iter().cloned()); unsafe { flatten! { UddSketch { alpha: sketch.alpha, max_buckets: sketch.max_buckets, num_buckets: sketch.num_buckets, compactions: sketch.compactions, count: sketch.count, sum: sketch.sum, zero_bucket_count, neg_indexes_bytes: (negative_indexes.len() as u32), neg_buckets_bytes: (negative_counts.len() as u32), pos_indexes_bytes: (positive_indexes.len() as u32), pos_buckets_bytes: (positive_counts.len() as u32), negative_indexes: (&*negative_indexes).into(), negative_counts: (&*negative_counts).into(), positive_indexes: (&*positive_indexes).into(), positive_counts: (&*positive_counts).into(), } } } } } impl<'input> InOutFuncs for UddSketch<'input> { fn output(&self, buffer: &mut StringInfo) { use crate::serialization::{str_to_db_encoding, EncodedStr::*}; let stringified = ron::to_string(&ReadableUddSketch::from(self)).unwrap(); match str_to_db_encoding(&stringified) { Utf8(s) => buffer.push_str(s), Other(s) => buffer.push_bytes(s.to_bytes()), } } fn input(input: &std::ffi::CStr) -> Self where Self: Sized, { use crate::serialization::str_from_db_encoding; let utf8_str = str_from_db_encoding(input); let val: ReadableUddSketch = ron::from_str(utf8_str).unwrap(); UddSketch::from(&val) } } impl<'input> UddSketch<'input> { fn keys(&self) -> impl Iterator + '_ { // FIXME does this really need a slice? decompress_keys( self.negative_indexes.as_slice(), self.zero_bucket_count != 0, self.positive_indexes.as_slice(), ) } fn counts(&self) -> impl Iterator + '_ { // FIXME does this really need a slice? decompress_counts( self.negative_counts.as_slice(), self.zero_bucket_count, self.positive_counts.as_slice(), ) } fn metadata(&self) -> UDDSketchMetadata { UDDSketchMetadata { max_buckets: self.max_buckets, current_error: self.alpha, compactions: u8::try_from(self.compactions) .expect("compactions cannot be higher than 65"), values: self.count, sum: self.sum, buckets: self.num_buckets, } } fn to_uddsketch(&self) -> UddSketchInternal { UddSketchInternal::new_from_data(&self.metadata(), self.keys(), self.counts()) } fn from_internal(state: &UddSketchInternal) -> Self { let CompressedBuckets { negative_indexes, negative_counts, zero_bucket_count, positive_indexes, positive_counts, } = compress_buckets(state.bucket_iter()); // we need to flatten the vector to a single buffer that contains // both the size, the data, and the varlen header unsafe { flatten!(UddSketch { alpha: state.max_error(), max_buckets: state.max_allowed_buckets(), num_buckets: state.current_buckets_count() as u32, compactions: state.times_compacted() as u64, count: state.count(), sum: state.sum(), zero_bucket_count, neg_indexes_bytes: negative_indexes.len() as u32, neg_buckets_bytes: negative_counts.len() as u32, pos_indexes_bytes: positive_indexes.len() as u32, pos_buckets_bytes: positive_counts.len() as u32, negative_indexes: negative_indexes.into(), negative_counts: negative_counts.into(), positive_indexes: positive_indexes.into(), positive_counts: positive_counts.into(), }) } } } impl<'input> FromIterator for UddSketch<'input> { fn from_iter>(iter: T) -> Self { let mut sketch = UddSketchInternal::new( PERCENTILE_AGG_DEFAULT_SIZE.into(), PERCENTILE_AGG_DEFAULT_ERROR, ); for value in iter { sketch.add_value(value); } Self::from_internal(&sketch) } } // PG function to generate a user-facing UddSketch object from a UddSketchInternal. #[pg_extern(immutable, parallel_safe)] fn uddsketch_final( state: Internal, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { uddsketch_final_inner(state.to_inner(), fcinfo) } } fn uddsketch_final_inner( state: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let state = match state { None => return None, Some(state) => state, }; UddSketch::from_internal(&state).into() }) } } #[derive(serde::Serialize, serde::Deserialize)] struct CompressedBuckets { negative_indexes: Vec, negative_counts: Vec, zero_bucket_count: u64, positive_indexes: Vec, positive_counts: Vec, } fn compress_buckets(buckets: impl Iterator) -> CompressedBuckets { let mut negative_indexes = prefix_varint::I64Compressor::with(delta::i64_encoder()); let mut negative_counts = prefix_varint::U64Compressor::with(delta::u64_encoder()); let mut zero_bucket_count = 0; let mut positive_indexes = prefix_varint::I64Compressor::with(delta::i64_encoder()); let mut positive_counts = prefix_varint::U64Compressor::with(delta::u64_encoder()); for (k, b) in buckets { match k { SketchHashKey::Negative(i) => { negative_indexes.push(i); negative_counts.push(b); } SketchHashKey::Zero => zero_bucket_count = b, SketchHashKey::Positive(i) => { positive_indexes.push(i); positive_counts.push(b); } SketchHashKey::Invalid => unreachable!(), } } let negative_indexes = negative_indexes.finish(); let negative_counts = negative_counts.finish(); let positive_indexes = positive_indexes.finish(); let positive_counts = positive_counts.finish(); CompressedBuckets { negative_indexes, negative_counts, zero_bucket_count, positive_indexes, positive_counts, } } fn decompress_keys<'i>( negative_indexes: &'i [u8], zero_bucket: bool, positive_indexes: &'i [u8], ) -> impl Iterator + 'i { let negatives = prefix_varint::i64_decompressor(negative_indexes) .map(delta::i64_decoder()) .map(SketchHashKey::Negative); let zero = zero_bucket.then(|| uddsketch::SketchHashKey::Zero); let positives = prefix_varint::i64_decompressor(positive_indexes) .map(delta::i64_decoder()) .map(SketchHashKey::Positive); negatives.chain(zero).chain(positives) } fn decompress_counts<'b>( negative_buckets: &'b [u8], zero_bucket: u64, positive_buckets: &'b [u8], ) -> impl Iterator + 'b { let negatives = prefix_varint::u64_decompressor(negative_buckets).map(delta::u64_decoder()); let zero = (zero_bucket != 0).then(|| zero_bucket); let positives = prefix_varint::u64_decompressor(positive_buckets).map(delta::u64_decoder()); negatives.chain(zero).chain(positives) } extension_sql!( "\n\ CREATE AGGREGATE uddsketch(\n\ size integer, max_error DOUBLE PRECISION, value DOUBLE PRECISION\n\ ) (\n\ sfunc = uddsketch_trans,\n\ stype = internal,\n\ finalfunc = uddsketch_final,\n\ combinefunc = uddsketch_combine,\n\ serialfunc = uddsketch_serialize,\n\ deserialfunc = uddsketch_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "udd_agg", requires = [ uddsketch_trans, uddsketch_final, uddsketch_combine, uddsketch_serialize, uddsketch_deserialize ], ); extension_sql!( "\n\ CREATE AGGREGATE percentile_agg(value DOUBLE PRECISION)\n\ (\n\ sfunc = percentile_agg_trans,\n\ stype = internal,\n\ finalfunc = uddsketch_final,\n\ combinefunc = uddsketch_combine,\n\ serialfunc = uddsketch_serialize,\n\ deserialfunc = uddsketch_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "percentile_agg", requires = [ percentile_agg_trans, uddsketch_final, uddsketch_combine, uddsketch_serialize, uddsketch_deserialize ], ); #[pg_extern(immutable, parallel_safe)] pub fn uddsketch_compound_trans<'a>( state: Internal, value: Option>, fcinfo: pg_sys::FunctionCallInfo, ) -> Option { unsafe { uddsketch_compound_trans_inner(state.to_inner(), value, fcinfo).internal() } } pub fn uddsketch_compound_trans_inner( state: Option>, value: Option, fcinfo: pg_sys::FunctionCallInfo, ) -> Option> { unsafe { in_aggregate_context(fcinfo, || { let Some(value) = value else { return state }; let Some(mut state) = state else { return Some(value.to_uddsketch().into()); }; state.merge_items(&value.metadata(), value.keys(), value.counts()); state.into() }) } } extension_sql!( "\n\ CREATE AGGREGATE rollup(\n\ sketch uddsketch\n\ ) (\n\ sfunc = uddsketch_compound_trans,\n\ stype = internal,\n\ finalfunc = uddsketch_final,\n\ combinefunc = uddsketch_combine,\n\ serialfunc = uddsketch_serialize,\n\ deserialfunc = uddsketch_deserialize,\n\ parallel = safe\n\ );\n\ ", name = "udd_rollup", requires = [ uddsketch_compound_trans, uddsketch_final, uddsketch_combine, uddsketch_serialize, uddsketch_deserialize ], ); //---- Available PG operations on the sketch #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_uddsketch_approx_percentile<'a>( sketch: UddSketch<'a>, accessor: AccessorApproxPercentile, ) -> f64 { uddsketch_approx_percentile(accessor.percentile, sketch) } // Approximate the value at the given approx_percentile (0.0-1.0) #[pg_extern(immutable, parallel_safe, name = "approx_percentile")] pub fn uddsketch_approx_percentile<'a>(percentile: f64, sketch: UddSketch<'a>) -> f64 { uddsketch::estimate_quantile( percentile, sketch.alpha, uddsketch::gamma(sketch.alpha), sketch.count, sketch.keys().zip(sketch.counts()), ) } #[pg_operator(immutable)] #[opname(->)] pub fn arrow_uddsketch_approx_percentile_array<'a>( sketch: UddSketch<'a>, percentiles: AccessorPercentileArray, ) -> Vec { approx_percentile_slice(&percentiles.percentile[..percentiles.len as usize], sketch) } // Approximate the value at the given approx_percentile (0.0-1.0) for each entry in an array #[pg_extern(immutable, name = "approx_percentile_array")] pub fn uddsketch_approx_percentile_array<'a>( percentiles: Vec, sketch: UddSketch<'a>, ) -> Vec { approx_percentile_slice(&percentiles, sketch) } fn approx_percentile_slice<'a, 'b>( percentiles: impl IntoIterator, sketch: UddSketch<'a>, ) -> Vec { let mut results = Vec::new(); for percentile in percentiles { results.push(uddsketch::estimate_quantile( *percentile, sketch.alpha, uddsketch::gamma(sketch.alpha), sketch.count, sketch.keys().zip(sketch.counts()), )) } results } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_uddsketch_approx_rank<'a>( sketch: UddSketch<'a>, accessor: AccessorApproxPercentileRank, ) -> f64 { uddsketch_approx_percentile_rank(accessor.value, sketch) } // Approximate the approx_percentile at the given value #[pg_extern(immutable, parallel_safe, name = "approx_percentile_rank")] pub fn uddsketch_approx_percentile_rank<'a>(value: f64, sketch: UddSketch<'a>) -> f64 { uddsketch::estimate_quantile_at_value( value, uddsketch::gamma(sketch.alpha), sketch.count, sketch.keys().zip(sketch.counts()), ) } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_uddsketch_num_vals<'a>(sketch: UddSketch<'a>, _accessor: AccessorNumVals) -> f64 { uddsketch_num_vals(sketch) } // Number of elements from which the sketch was built. #[pg_extern(immutable, parallel_safe, name = "num_vals")] pub fn uddsketch_num_vals<'a>(sketch: UddSketch<'a>) -> f64 { sketch.count as f64 } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_uddsketch_mean<'a>(sketch: UddSketch<'a>, _accessor: AccessorMean) -> f64 { uddsketch_mean(sketch) } // Average of all the values entered in the sketch. // Note that this is not an approximation, though there may be loss of precision. #[pg_extern(immutable, parallel_safe, name = "mean")] pub fn uddsketch_mean<'a>(sketch: UddSketch<'a>) -> f64 { if sketch.count > 0 { sketch.sum / sketch.count as f64 } else { 0.0 } } // Total sum of all the values entered in the sketch. #[pg_extern(immutable, parallel_safe, name = "total")] pub fn uddsketch_sum(sketch: UddSketch<'_>) -> f64 { sketch.sum } #[pg_operator(immutable, parallel_safe)] #[opname(->)] pub fn arrow_uddsketch_error<'a>(sketch: UddSketch<'a>, _accessor: AccessorError) -> f64 { uddsketch_error(sketch) } // The maximum error (relative to the true value) for any approx_percentile estimate. #[pg_extern(immutable, parallel_safe, name = "error")] pub fn uddsketch_error<'a>(sketch: UddSketch<'a>) -> f64 { sketch.alpha } #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use super::*; use pgrx_macros::pg_test; // Assert equality between two floats, within some fixed error range. fn apx_eql(value: f64, expected: f64, error: f64) { assert!( (value - expected).abs() < error, "Float value {value} differs from expected {expected} by more than {error}" ); } // Assert equality between two floats, within an error expressed as a fraction of the expected value. fn pct_eql(value: f64, expected: f64, pct_error: f64) { apx_eql(value, expected, pct_error * expected); } #[pg_test] fn test_aggregate() { Spi::connect_mut(|client| { client .update("CREATE TABLE test (data DOUBLE PRECISION)", None, &[]) .unwrap(); client .update( "INSERT INTO test SELECT generate_series(0.01, 100, 0.01)", None, &[], ) .unwrap(); let sanity = client .update("SELECT COUNT(*) FROM test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(Some(10000), sanity); client .update( "CREATE VIEW sketch AS \ SELECT uddsketch(100, 0.05, data) \ FROM test", None, &[], ) .unwrap(); let sanity = client .update("SELECT COUNT(*) FROM sketch", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert!(sanity.unwrap_or(0) > 0); let (mean, count) = client .update( "SELECT \ mean(uddsketch), \ num_vals(uddsketch) \ FROM sketch", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); apx_eql(mean.unwrap(), 50.005, 0.0001); apx_eql(count.unwrap(), 10000.0, 0.000001); let (mean2, count2) = client .update( "SELECT \ uddsketch -> mean(), \ uddsketch -> num_vals() \ FROM sketch", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); assert_eq!(mean, mean2); assert_eq!(count, count2); let (error, error2) = client .update( "SELECT \ error(uddsketch), \ uddsketch -> error() \ FROM sketch", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); apx_eql(error.unwrap(), 0.05, 0.0001); assert_eq!(error, error2); for i in 0..=100 { let value = i as f64; let approx_percentile = value / 100.0; let (est_val, est_quant) = client .update( &format!( "SELECT \ approx_percentile({approx_percentile}, uddsketch), \ approx_percentile_rank({value}, uddsketch) \ FROM sketch" ), None, &[], ) .unwrap() .first() .get_two::() .unwrap(); if i == 0 { pct_eql(est_val.unwrap(), 0.01, 1.0); apx_eql(est_quant.unwrap(), approx_percentile, 0.0001); } else { pct_eql(est_val.unwrap(), value, 1.0); pct_eql(est_quant.unwrap(), approx_percentile, 1.0); } let (est_val2, est_quant2) = client .update( &format!( "SELECT \ uddsketch->approx_percentile({approx_percentile}), \ uddsketch->approx_percentile_rank({value}) \ FROM sketch" ), None, &[], ) .unwrap() .first() .get_two::() .unwrap(); assert_eq!(est_val, est_val2); assert_eq!(est_quant, est_quant2); } }); } #[pg_test] fn test_compound_agg() { Spi::connect_mut(|client| { client .update( "CREATE TABLE new_test (device INTEGER, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("INSERT INTO new_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, &[]).unwrap(); let sanity = client .update("SELECT COUNT(*) FROM new_test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(Some(1010), sanity); client .update( "CREATE VIEW sketches AS \ SELECT device, uddsketch(20, 0.01, value) \ FROM new_test \ GROUP BY device", None, &[], ) .unwrap(); client .update( "CREATE VIEW composite AS \ SELECT rollup(uddsketch) as uddsketch \ FROM sketches", None, &[], ) .unwrap(); client .update( "CREATE VIEW base AS \ SELECT uddsketch(20, 0.01, value) \ FROM new_test", None, &[], ) .unwrap(); let (value, error) = client .update( "SELECT \ approx_percentile(0.9, uddsketch), \ error(uddsketch) \ FROM base", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); let (test_value, test_error) = client .update( "SELECT \ approx_percentile(0.9, uddsketch), \ error(uddsketch) \ FROM composite", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); apx_eql(test_value.unwrap(), value.unwrap(), 0.0001); apx_eql(test_error.unwrap(), error.unwrap(), 0.000001); pct_eql(test_value.unwrap(), 9.0, test_error.unwrap()); }); } #[pg_test] fn test_percentile_agg() { Spi::connect_mut(|client| { client .update( "CREATE TABLE pa_test (device INTEGER, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("INSERT INTO pa_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, &[]).unwrap(); let sanity = client .update("SELECT COUNT(*) FROM pa_test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(Some(1010), sanity); // use the default values for percentile_agg client .update( "CREATE VIEW uddsketch_test AS \ SELECT uddsketch(200, 0.001, value) as approx \ FROM pa_test ", None, &[], ) .unwrap(); client .update( "CREATE VIEW percentile_agg AS \ SELECT percentile_agg(value) as approx \ FROM pa_test", None, &[], ) .unwrap(); let (value, error) = client .update( "SELECT \ approx_percentile(0.9, approx), \ error(approx) \ FROM uddsketch_test", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); let (test_value, test_error) = client .update( "SELECT \ approx_percentile(0.9, approx), \ error(approx) \ FROM percentile_agg", None, &[], ) .unwrap() .first() .get_two::() .unwrap(); apx_eql(test_value.unwrap(), value.unwrap(), 0.0001); apx_eql(test_error.unwrap(), error.unwrap(), 0.000001); pct_eql(test_value.unwrap(), 9.0, test_error.unwrap()); }); } #[pg_test] fn test_approx_percentile_array() { Spi::connect_mut(|client| { client .update( "CREATE TABLE paa_test (device INTEGER, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("INSERT INTO paa_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, &[]).unwrap(); let sanity = client .update("SELECT COUNT(*) FROM paa_test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(Some(1010), sanity); client .update( "CREATE VIEW uddsketch_test AS \ SELECT uddsketch(200, 0.001, value) as approx \ FROM paa_test ", None, &[], ) .unwrap(); client .update( "CREATE VIEW percentile_agg AS \ SELECT percentile_agg(value) as approx \ FROM paa_test", None, &[], ) .unwrap(); let (value, error) = client .update( "SELECT \ approx_percentile_array(array[0.9,0.5,0.2], approx), \ error(approx) \ FROM uddsketch_test", None, &[], ) .unwrap() .first() .get_two::, f64>() .unwrap(); let (test_value, test_error) = client .update( "SELECT \ approx_percentile_array(array[0.9,0.5,0.2], approx), \ error(approx) \ FROM percentile_agg", None, &[], ) .unwrap() .first() .get_two::, f64>() .unwrap(); assert!( test_value .as_ref() .unwrap() .iter() .zip(value.unwrap()) .all(|(a, b)| { (a - b).abs() < 0.0001 }), "Some Float value differs from expected by more than {}", 0.0001 ); apx_eql(test_error.unwrap(), error.unwrap(), 0.000001); assert!(test_value .unwrap() .iter() .zip(vec![9.0, 5.0, 2.0]) .all(|(a, b)| { matches!(pct_eql(*a, b, test_error.unwrap()), ()) })); }); } #[pg_test] fn test_approx_percentile_array_arrow() { Spi::connect_mut(|client| { client .update( "CREATE TABLE paa_test (device INTEGER, value DOUBLE PRECISION)", None, &[], ) .unwrap(); client.update("INSERT INTO paa_test SELECT dev, dev - v FROM generate_series(1,10) dev, generate_series(0, 1.0, 0.01) v", None, &[]).unwrap(); let sanity = client .update("SELECT COUNT(*) FROM paa_test", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(Some(1010), sanity); client .update( "CREATE VIEW uddsketch_test AS \ SELECT uddsketch(200, 0.001, value) as approx \ FROM paa_test ", None, &[], ) .unwrap(); client .update( "CREATE VIEW percentile_agg AS \ SELECT percentile_agg(value) as approx \ FROM paa_test", None, &[], ) .unwrap(); let (value, error) = client .update( "SELECT \ approx_percentile_array(array[0.9,0.5,0.2], approx), \ error(approx) \ FROM uddsketch_test", None, &[], ) .unwrap() .first() .get_two::, f64>() .unwrap(); let (test_value_arrow, test_error_arrow) = client .update( "SELECT approx->approx_percentiles(array[0.9,0.5,0.2]), \ error(approx) \ FROM uddsketch_test", None, &[], ) .unwrap() .first() .get_two::, f64>() .unwrap(); assert!( test_value_arrow .as_ref() .unwrap() .iter() .zip(value.as_ref().unwrap()) .all(|(a, b)| { (a - b).abs() < 0.0001 }), "Some Float value differs from expected by more than {}", 0.0001 ); apx_eql(test_error_arrow.unwrap(), error.unwrap(), 0.000001); assert!(test_value_arrow .unwrap() .iter() .zip(vec![9.0, 5.0, 2.0]) .all(|(a, b)| { matches!(pct_eql(*a, b, test_error_arrow.unwrap()), ()) })); }); } #[pg_test] fn uddsketch_io_test() { Spi::connect_mut(|client| { client .update("CREATE TABLE io_test (value DOUBLE PRECISION)", None, &[]) .unwrap(); client.update("INSERT INTO io_test VALUES (-1000), (-100), (-10), (-1), (-0.1), (-0.01), (-0.001), (0), (0.001), (0.01), (0.1), (1), (10), (100), (1000)", None, &[]).unwrap(); let sketch = client .update( "SELECT uddsketch(10, 0.01, value)::text FROM io_test", None, &[], ) .unwrap() .first() .get_one::() .unwrap(); let expected = "(\ version:1,\ alpha:0.9881209712069546,\ max_buckets:10,\ num_buckets:9,\ compactions:8,\ count:15,\ sum:0,\ buckets:[\ (Negative(2),1),\ (Negative(1),2),\ (Negative(0),3),\ (Negative(-1),1),\ (Zero,1),\ (Positive(-1),1),\ (Positive(0),3),\ (Positive(1),2),\ (Positive(2),1)\ ]\ )"; assert_eq!(sketch, Some(expected.into())); client .update( "CREATE VIEW sketch AS SELECT uddsketch(10, 0.01, value) FROM io_test", None, &[], ) .unwrap(); for cmd in [ "mean(", "num_vals(", "error(", "approx_percentile(0.1,", "approx_percentile(0.25,", "approx_percentile(0.5,", "approx_percentile(0.6,", "approx_percentile(0.8,", ] { let sql1 = format!("SELECT {cmd}uddsketch) FROM sketch"); let sql2 = format!("SELECT {cmd}'{expected}'::uddsketch) FROM sketch"); let expected = client .update(&sql1, None, &[]) .unwrap() .first() .get_one::() .unwrap() .unwrap(); let test = client .update(&sql2, None, &[]) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert!((expected - test).abs() < f64::EPSILON); } }); } #[pg_test] fn uddsketch_byte_io_test() { unsafe { use std::ptr; let state = uddsketch_trans_inner(None, 100, 0.005, Some(14.0), ptr::null_mut()); let state = uddsketch_trans_inner(state, 100, 0.005, Some(18.0), ptr::null_mut()); let state = uddsketch_trans_inner(state, 100, 0.005, Some(22.7), ptr::null_mut()); let state = uddsketch_trans_inner(state, 100, 0.005, Some(39.42), ptr::null_mut()); let state = uddsketch_trans_inner(state, 100, 0.005, Some(-43.0), ptr::null_mut()); let control = state.unwrap(); let buffer = uddsketch_serialize(Inner::from(control.clone()).internal().unwrap()); let buffer = pgrx::varlena::varlena_to_byte_slice(buffer.0.cast_mut_ptr()); let expected = [ 1, 1, 123, 20, 174, 71, 225, 122, 116, 63, 100, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 144, 194, 245, 40, 92, 143, 73, 64, 2, 0, 0, 0, 0, 0, 0, 0, 202, 11, 1, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 66, 8, 105, 93, 221, 4, 0, 0, 0, 0, 0, 0, 0, 5, 1, 1, 1, ]; assert_eq!(buffer, expected); let expected = pgrx::varlena::rust_byte_slice_to_bytea(&expected); let new_state = uddsketch_deserialize_inner(bytea(pg_sys::Datum::from(expected.as_ptr()))); assert_eq!(&*new_state, &*control); } } #[pg_test] fn test_udd_null_input_yields_null_output() { Spi::connect_mut(|client| { let output = client .update("SELECT uddsketch(20, 0.01, NULL)::TEXT", None, &[]) .unwrap() .first() .get_one::() .unwrap(); assert_eq!(output, None) }) } } ================================================ FILE: extension/src/utilities.rs ================================================ use crate::raw::TimestampTz; use pgrx::prelude::*; #[pg_extern( name = "generate_periodic_normal_series", schema = "toolkit_experimental" )] pub fn default_generate_periodic_normal_series( series_start: crate::raw::TimestampTz, rng_seed: Option, ) -> TableIterator<'static, (name!(time, TimestampTz), name!(value, f64))> { generate_periodic_normal_series(series_start, None, None, None, None, None, None, rng_seed) } #[allow(clippy::too_many_arguments)] pub fn alternate_generate_periodic_normal_series( series_start: crate::raw::TimestampTz, periods_per_series: i64, points_per_period: i64, seconds_between_points: i64, base_value: f64, periodic_magnitude: f64, standard_deviation: f64, rng_seed: Option, ) -> TableIterator<'static, (name!(time, TimestampTz), name!(value, f64))> { generate_periodic_normal_series( series_start, Some(periods_per_series * points_per_period * seconds_between_points * 1000000), Some(seconds_between_points * 1000000), Some(base_value), Some(points_per_period * seconds_between_points * 1000000), Some(periodic_magnitude), Some(standard_deviation), rng_seed, ) } #[allow(clippy::too_many_arguments)] #[pg_extern(schema = "toolkit_experimental")] pub fn generate_periodic_normal_series( series_start: crate::raw::TimestampTz, series_len: Option, //pg_sys::Interval, sample_interval: Option, //pg_sys::Interval, base_value: Option, period: Option, //pg_sys::Interval, periodic_magnitude: Option, standard_deviation: Option, rng_seed: Option, ) -> TableIterator<'static, (name!(time, TimestampTz), name!(value, f64))> { // Convenience consts to make defaults more readable const SECOND: i64 = 1000000; const MIN: i64 = 60 * SECOND; const HOUR: i64 = 60 * MIN; const DAY: i64 = 24 * HOUR; // TODO: exposing defaults in the PG function definition would be much nicer let series_len = series_len.unwrap_or(28 * DAY); let sample_interval = sample_interval.unwrap_or(10 * MIN); let base_value = base_value.unwrap_or(1000.0); let period = period.unwrap_or(DAY); let periodic_magnitude = periodic_magnitude.unwrap_or(100.0); let standard_deviation = standard_deviation.unwrap_or(100.0); use rand::SeedableRng; use rand_chacha::ChaCha12Rng; use rand_distr::Distribution; let mut rng = match rng_seed { Some(v) => ChaCha12Rng::seed_from_u64(v as u64), None => ChaCha12Rng::from_entropy(), }; let distribution = rand_distr::Normal::new(0.0, standard_deviation).unwrap(); let series_start: i64 = series_start.into(); TableIterator::new( (0..series_len) .step_by(sample_interval as usize) .map(move |accum| { let time = series_start + accum; let base = base_value + f64::sin(accum as f64 / (2.0 * std::f64::consts::PI * period as f64)) * periodic_magnitude; let error = distribution.sample(&mut rng); (time.into(), base + error) }), ) } // Returns days in month extension_sql!( " CREATE FUNCTION days_in_month(date timestamptz) RETURNS int SET search_path TO pg_catalog,pg_temp AS $$ SELECT CAST(EXTRACT('day' FROM (date_trunc('month', $1) + interval '1 month' - date_trunc('month', $1))) AS INTEGER) $$ LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE; ", name = "days_in_month", ); // Normalizes metric based on reference date and days extension_sql!( " CREATE FUNCTION month_normalize(metric float8, reference_date timestamptz, days float8 DEFAULT 365.25/12) RETURNS float8 SET search_path TO pg_catalog,pg_temp AS $$ SELECT metric * days / CAST(EXTRACT('day' FROM (reference_date + interval '1 month' - reference_date)) as INTEGER) $$ LANGUAGE SQL STRICT IMMUTABLE PARALLEL SAFE; ", name="month_normalize", ); // Convert a timestamp to a double precision unix epoch extension_sql!( " CREATE FUNCTION to_epoch(timestamptz) RETURNS DOUBLE PRECISION LANGUAGE SQL IMMUTABLE PARALLEL SAFE SET search_path TO pg_catalog,pg_temp AS $$ SELECT EXTRACT(EPOCH FROM $1); $$; ", name = "to_epoch", ); #[cfg(any(test, feature = "pg_test"))] #[pg_schema] mod tests { use pgrx::*; use pgrx_macros::pg_test; #[pg_test] fn test_to_epoch() { Spi::connect_mut(|client| { let test_val = client .update( "SELECT to_epoch('2021-01-01 00:00:00+03'::timestamptz)", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert!((test_val - 1609448400f64).abs() < f64::EPSILON); let test_val = client .update("SELECT to_epoch('epoch'::timestamptz)", None, &[]) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert!((test_val - 0f64).abs() < f64::EPSILON); let test_val = client .update( "SELECT to_epoch('epoch'::timestamptz - interval '42 seconds')", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert!((test_val - -42f64).abs() < f64::EPSILON); }); } #[pg_test] fn test_days_in_month() { Spi::connect_mut(|client| { let test_val = client .update( "SELECT days_in_month('2021-01-01 00:00:00+03'::timestamptz)", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_val, 31); }); Spi::connect_mut(|client| { let test_val = client .update( "SELECT days_in_month('2020-02-03 00:00:00+03'::timestamptz)", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_val, 29); }); Spi::connect_mut(|client| { let test_val = client .update( "SELECT days_in_month('2023-01-31 00:00:00+00'::timestamptz)", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_val, 31); }); } #[pg_test] fn test_monthly_normalize() { Spi::connect_mut(|client| { let test_val = client .update( "SELECT month_normalize(1000,'2021-01-01 00:00:00+03'::timestamptz)", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_val, 981.8548387096774f64); }); Spi::connect_mut(|client| { let test_val = client .update( "SELECT month_normalize(1000,'2021-01-01 00:00:00+03'::timestamptz,30.5)", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_val, 983.8709677419355f64); }); Spi::connect_mut(|client| { let test_val = client .update( "SELECT month_normalize(1000,'2021-01-01 00:00:00+03'::timestamptz,30)", None, &[], ) .unwrap() .first() .get_one::() .unwrap() .unwrap(); assert_eq!(test_val, 967.741935483871f64); }); } } ================================================ FILE: extension/timescaledb_toolkit.control ================================================ comment = 'Library of analytical hyperfunctions, time-series pipelining, and other SQL utilities' default_version = '@CARGO_VERSION@' relocatable = false superuser = false module_pathname = '$libdir/timescaledb_toolkit' # only for testing, will be removed for real installs # comma-separated list of previous versions this version can be upgraded from # directly. This is used to generate upgrade scripts. # upgradeable_from = '1.6.0, 1.7.0, 1.8.0, 1.10.0-dev, 1.10.1, 1.11.0, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 1.14.0, 1.15.0, 1.16.0, 1.17.0, 1.18.0, 1.19.0, 1.21.0' ================================================ FILE: tests/update/candlestick.md ================================================ # Candlestick Tests ## Get candlestick values from tick data ```sql,creation,min-toolkit-version=1.14.0 CREATE TABLE stocks_real_time(time TIMESTAMPTZ, symbol TEXT, price DOUBLE PRECISION,day_volume DOUBLE PRECISION); INSERT INTO stocks_real_time VALUES ('2023-01-11', 'AAPL', 133.445,10), ('2023-01-11', 'PFE', 47.38,2), ('2023-01-11', 'AMZN', 95.225,1), ('2023-01-11', 'INTC', 29.82,NULL), ('2023-01-11', 'MSFT', 235.5,100), ('2023-01-11', 'TSLA', 123.085,NULL), ('2023-01-11', 'AAPL', 133.44,20); CREATE MATERIALIZED VIEW cs AS SELECT symbol, candlestick_agg("time", price, day_volume) AS candlestick FROM stocks_real_time GROUP BY symbol; ``` ```sql,validation,min-toolkit-version=1.14.0 SELECT symbol, open(candlestick), high(candlestick), low(candlestick), close(candlestick), volume(candlestick) FROM cs ORDER BY symbol; ``` ```output symbol | open | high | low | close | volume --------+---------+---------+---------+---------+-------- AAPL | 133.445 | 133.445 | 133.44 | 133.445 | 30 AMZN | 95.225 | 95.225 | 95.225 | 95.225 | 1 INTC | 29.82 | 29.82 | 29.82 | 29.82 | MSFT | 235.5 | 235.5 | 235.5 | 235.5 | 100 PFE | 47.38 | 47.38 | 47.38 | 47.38 | 2 TSLA | 123.085 | 123.085 | 123.085 | 123.085 | ``` ================================================ FILE: tests/update/heartbeat.md ================================================ # Candlestick Tests ## Get candlestick values from tick data ```sql,creation,min-toolkit-version=1.15.0 CREATE TABLE liveness(heartbeat TIMESTAMPTZ, start TIMESTAMPTZ); INSERT INTO liveness VALUES ('01-01-2020 0:2:20 UTC', '01-01-2020 0:0 UTC'), ('01-01-2020 0:10 UTC', '01-01-2020 0:0 UTC'), ('01-01-2020 0:17 UTC', '01-01-2020 0:0 UTC'), ('01-01-2020 0:30 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:35 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:40 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:35 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:40 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:40 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 0:50:30 UTC', '01-01-2020 0:30 UTC'), ('01-01-2020 1:00:30 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:08 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:18 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:28 UTC', '01-01-2020 1:00 UTC'), ('01-01-2020 1:38:01 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:40 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:40:01 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:50:01 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:57 UTC', '01-01-2020 1:30 UTC'), ('01-01-2020 1:59:50 UTC', '01-01-2020 1:30 UTC'); CREATE MATERIALIZED VIEW hb AS SELECT start, heartbeat_agg(heartbeat, start, '30m', '10m') AS agg FROM liveness GROUP BY start; ``` ```sql,validation,min-toolkit-version=1.15.0 SELECT start, uptime(agg), interpolated_uptime(agg, LAG(agg) OVER (ORDER by start)) FROM hb ORDER BY start; ``` ```output start | uptime | interpolated_uptime ------------------------+----------+--------------------- 2020-01-01 00:00:00+00 | 00:24:40 | 00:24:40 2020-01-01 00:30:00+00 | 00:29:30 | 00:29:30 2020-01-01 01:00:00+00 | 00:29:30 | 00:30:00 2020-01-01 01:30:00+00 | 00:21:59 | 00:29:59 ``` ================================================ FILE: tests/update/original_update_tests.md ================================================ # Original Update Tests ```sql,creation,min-toolkit-version=1.4.0 CREATE TABLE test_data(ts timestamptz, val DOUBLE PRECISION); INSERT INTO test_data SELECT '2020-01-01 00:00:00+00'::timestamptz + i * '1 hour'::interval, 100 + i % 100 FROM generate_series(0, 10000) i; CREATE MATERIALIZED VIEW regression_view AS SELECT counter_agg(ts, val) AS countagg, hyperloglog(1024, val) AS hll, time_weight('locf', ts, val) AS twa, uddsketch(100, 0.001, val) as udd, tdigest(100, val) as tdig, stats_agg(val) as stats FROM test_data; ``` ```sql,validation,min-toolkit-version=1.4.0 SELECT num_resets(countagg), distinct_count(hll), average(twa), approx_percentile(0.1, udd), approx_percentile(0.1, tdig), kurtosis(stats) FROM regression_view; ``` ```output num_resets | distinct_count | average | approx_percentile | approx_percentile | kurtosis ------------+----------------+---------+--------------------+--------------------+-------------------- 100 | 100 | 149.5 | 108.96220333142547 | 109.50489521100047 | 1.7995661075080858 ``` ================================================ FILE: tests/update/state_agg.md ================================================ # `state_agg` tests ```sql,creation,min-toolkit-version=1.15.0 CREATE TABLE states_test(ts TIMESTAMPTZ, state TEXT); INSERT INTO states_test VALUES ('2020-01-01 00:00:00+00', 'START'), ('2020-01-01 00:00:11+00', 'OK'), ('2020-01-01 00:01:00+00', 'ERROR'), ('2020-01-01 00:01:03+00', 'OK'), ('2020-01-01 00:02:00+00', 'STOP'); CREATE TABLE agg(sa StateAgg); INSERT INTO agg SELECT state_agg(ts, state) FROM states_test; ``` ```sql,validation,min-toolkit-version=1.15.0 SELECT (state_timeline(sa)).* FROM agg; ``` ```output state | start_time | end_time -------+------------------------+------------------------ START | 2020-01-01 00:00:00+00 | 2020-01-01 00:00:11+00 OK | 2020-01-01 00:00:11+00 | 2020-01-01 00:01:00+00 ERROR | 2020-01-01 00:01:00+00 | 2020-01-01 00:01:03+00 OK | 2020-01-01 00:01:03+00 | 2020-01-01 00:02:00+00 STOP | 2020-01-01 00:02:00+00 | 2020-01-01 00:02:00+00 ``` ================================================ FILE: tests/update/time-vector.md ================================================ # Time Vector Tests ```sql,creation CREATE TABLE time_vector_data(time TIMESTAMPTZ, value DOUBLE PRECISION); INSERT INTO time_vector_data VALUES ('2020-1-1 UTC', 30.0), ('2020-1-2 UTC', 45.0), ('2020-1-3 UTC', NULL), ('2020-1-4 UTC', 55.5), ('2020-1-5 UTC', 10.0); ``` ```sql,validation SELECT unnest(timevector(time,value))::TEXT FROM time_vector_data; ``` ```output unnest --------------------------------- ("2020-01-01 00:00:00+00",30) ("2020-01-02 00:00:00+00",45) ("2020-01-03 00:00:00+00",NaN) ("2020-01-04 00:00:00+00",55.5) ("2020-01-05 00:00:00+00",10) ``` ```sql,creation CREATE TABLE tv_rollup_data(time TIMESTAMPTZ, value DOUBLE PRECISION, bucket INTEGER); INSERT INTO tv_rollup_data VALUES ('2020-1-1 UTC', 30.0, 1), ('2020-1-2 UTC', 45.0, 1), ('2020-1-3 UTC', NULL, 2), ('2020-1-4 UTC', 55.5, 2), ('2020-1-5 UTC', 10.0, 3), ('2020-1-6 UTC', 13.0, 3), ('2020-1-7 UTC', 71.0, 4), ('2020-1-8 UTC', 0.0, 4); ``` ```sql,validation SELECT unnest(rollup(tvec))::TEXT FROM ( SELECT timevector(time, value) AS tvec FROM tv_rollup_data GROUP BY bucket ORDER BY bucket ) s; ``` ```output unnest ------------------------------- ("2020-01-01 00:00:00+00",30) ("2020-01-02 00:00:00+00",45) ("2020-01-03 00:00:00+00",NaN) ("2020-01-04 00:00:00+00",55.5) ("2020-01-05 00:00:00+00",10) ("2020-01-06 00:00:00+00",13) ("2020-01-07 00:00:00+00",71) ("2020-01-08 00:00:00+00",0) ``` ================================================ FILE: tests/update/time-weighted-average.md ================================================ # Time Weighted Average Tests ## Test integral and interpolated integral ```sql,creation,min-toolkit-version=1.15.0 CREATE TABLE time_weight_test(time timestamptz, value double precision, bucket timestamptz); INSERT INTO time_weight_test VALUES ('2020-1-1 8:00'::timestamptz, 10.0, '2020-1-1'::timestamptz), ('2020-1-1 12:00'::timestamptz, 40.0, '2020-1-1'::timestamptz), ('2020-1-1 16:00'::timestamptz, 20.0, '2020-1-1'::timestamptz), ('2020-1-2 2:00'::timestamptz, 15.0, '2020-1-2'::timestamptz), ('2020-1-2 12:00'::timestamptz, 50.0, '2020-1-2'::timestamptz), ('2020-1-2 20:00'::timestamptz, 25.0, '2020-1-2'::timestamptz), ('2020-1-3 10:00'::timestamptz, 30.0, '2020-1-3'::timestamptz), ('2020-1-3 12:00'::timestamptz, 0.0, '2020-1-3'::timestamptz), ('2020-1-3 16:00'::timestamptz, 35.0, '2020-1-3'::timestamptz); CREATE MATERIALIZED VIEW twa AS ( SELECT bucket, time_weight('linear', time, value) as agg FROM time_weight_test GROUP BY bucket ); ``` ```sql,validation,min-toolkit-version=1.15.0 SELECT bucket, interpolated_integral( agg, bucket, '1 day'::interval, LAG(agg) OVER (ORDER BY bucket), LEAD(agg) OVER (ORDER BY bucket), 'hours') FROM twa ORDER BY bucket; ``` ```output bucket | interpolated_integral ------------------------+----------------------- 2020-01-01 00:00:00+00 | 364 2020-01-02 00:00:00+00 | 758.8571428571429 2020-01-03 00:00:00+00 | 382.1428571428571 ``` ```sql,validation,min-toolkit-version=1.15.0 SELECT bucket, integral(agg, 'hrs') FROM twa ORDER BY bucket; ``` ```output bucket | integral ------------------------+---------- 2020-01-01 00:00:00+00 | 220 2020-01-02 00:00:00+00 | 625 2020-01-03 00:00:00+00 | 100 ``` ================================================ FILE: tools/build ================================================ #!/bin/sh set -ex print() { printf '%s\n' "$*" } die() { st=${?:-0} if [ $st -eq 0 ]; then st=2 fi print "$*" >&2 exit $st } usage() { die 'build [ -n -pg1[234] -profile release ] ( test-crates | test-extension | install | test-doc | test-updates | clippy)' } require_pg_version() { [ -n "$pg_version" ] || die 'specify one of -pg15 | -pg16 | -pg17 | -pg18' } find_pg_config() { if [ -z "$pg_config" ]; then require_pg_version pg_config=`which $(sed -ne 's/"//g' -e "s/^pg$pg_version *= *//p" ~/.pgrx/config.toml)` fi [ -x "$pg_config" ] || die "$pg_config not executable" } require_cargo_pgrx() { [ -n "$cargo_pgrx" ] || die 'specify path to cargo-pgrx (0.4 series or newer)' } require_cargo_pgrx_old() { [ -n "$cargo_pgrx_old" ] || die 'specify path to cargo-pgrx (0.2-0.3 series)' } find_profile() { [ -n "$profile" ] || profile=dev } [ $# -ge 1 ] || usage # check versions cargo --version rustc --version rustup --version while [ $# -gt 0 ]; do arg="$1" shift case "$arg" in -n) nop=: ;; -pgconfig) pg_config="$1" shift ;; -cargo-pgrx) cargo_pgrx="$1" shift ;; -cargo-pgrx-old) cargo_pgrx_old="$1" shift ;; -pgport) pg_port="$1" shift ;; -pg1[0-9]) # If this script survives to postgresql 19, WE WIN! pg_version=${arg#-pg} pg=pg$pg_version [ -z "$pg_port" ] && pg_port=288$pg_version ;; -profile) profile="$1" shift ;; clippy) find_profile $nop cargo fetch $nop cargo clippy --profile $profile --workspace --features pg_test -- -D warnings ;; test-crates) # Should find no dependency crates to fetch. If it finds any, we need to update the cache key. find_profile $nop cargo fetch $nop cargo test --profile $profile --workspace --exclude timescaledb_toolkit ;; test-extension) cd extension find_profile require_pg_version $nop cargo fetch $nop cargo test --profile $profile --features "$pg pg_test" --no-default-features ;; install) find_profile require_pg_version find_pg_config (cd extension && $nop cargo pgrx install --profile $profile -c "$pg_config") $nop cargo run --manifest-path tools/post-install/Cargo.toml "$pg_config" ;; test-doc) find_profile require_pg_version $nop cargo pgrx start --package timescaledb_toolkit $pg || (cat /home/postgres/.pgrx/${pg_version}.log; false) $nop cargo run --profile $profile -p sql-doctester -- \ -h localhost \ -p $pg_port \ docs $nop cargo pgrx stop --package timescaledb_toolkit $pg ;; test-updates) find_profile require_pg_version find_pg_config require_cargo_pgrx require_cargo_pgrx_old $nop cargo pgrx start --package timescaledb_toolkit $pg || (cat /home/postgres/.pgrx/${pg_version}.log; false) $nop cargo run --profile $profile --manifest-path tools/update-tester/Cargo.toml -- full-update-test-source \ -h localhost \ -p $pg_port \ --cache old-versions \ "$pg_config" \ "$cargo_pgrx" \ "$cargo_pgrx_old" $nop cargo pgrx stop --package timescaledb_toolkit $pg ;; *) usage ;; esac done ================================================ FILE: tools/dependencies.sh ================================================ # Dependency configuration # Ideally, all dependencies would be specified in just one place. # Exceptions: # - crate dependencies are specified in Cargo.toml files # - postgres versions are duplicated in the Github Actions matrix # - Readme.md lists some, too. TODO is it acceptable to just point to this file? # All our automation scripts read this, so at least we're not duplicating this # information across all those. PG_VERSIONS='15 16 17 18' # TODO: extend this with 18 this once TimescaleDB supports PostgreSQL 18 TSDB_PG_VERSIONS='15 16 17' CARGO_EDIT=0.11.2 # Keep synchronized with extension/Cargo.toml and `cargo install --version N.N.N cargo-pgrx` in Readme.md . PGRX_VERSION=0.16.1 RUST_TOOLCHAIN=1.89.0 RUST_PROFILE=minimal RUST_COMPONENTS=clippy,rustfmt # We use fpm 1.14.2 to build RPMs. # TODO Use rpmbuild directly. FPM_VERSION=1.14.2 GH_DEB_URL=https://github.com/cli/cli/releases/download/v2.16.1/gh_2.16.1_linux_amd64.deb GH_DEB_SHA256=d0ba8693b6e4c1bde6683ccfa971a15c00b9fe92865074d48609959d04399dc7 ================================================ FILE: tools/install-timescaledb ================================================ #!/bin/sh git clone "$2" timescaledb cd timescaledb git switch --detach "$3" mkdir build cd build # this overwrites the files from the TimescaleDB package cmake .. -DUSE_OPENSSL=0 -DLINTER=0 -DCMAKE_PROGRAM_PATH=/usr/lib/postgresql/$1/bin make install ================================================ FILE: tools/post-install/Cargo.toml ================================================ [package] name = "post-install" version = "0.1.0" edition = "2021" [dependencies] xshell = "0.1.17" walkdir = "2" ================================================ FILE: tools/post-install/src/main.rs ================================================ #![allow(unexpected_cfgs)] use std::{ env, fs::{self, File}, io::{BufRead, BufReader, BufWriter, Write}, path::{Path, PathBuf}, process, }; use xshell::cmd; mod update_script; macro_rules! path { ($start:ident $(/ $segment: literal)*) => { { let root: &Path = $start.as_ref(); root $(.join($segment))* } }; ($start:ident / $segment: expr) => { { let root: &Path = $start.as_ref(); root.join($segment) } } } fn main() { if let Err(err) = try_main() { eprintln!("{err}"); process::exit(1); } } fn try_main() -> xshell::Result<()> { let pg_config = env::args().nth(1).expect("missing /path/to/pg_config"); let extension_info = if pg_config == "--dir" { let package_dir = env::args().nth(2).expect("missing /path/to/package_dir"); get_extension_info_from_dir(&package_dir)? } else { get_extension_info_from_pg_config(&pg_config)? }; // remove `module_path = '$libdir/timescaledb_toolkit'` // from timescaledb_toolkit.control. // Not needed for correctness purposes, but it ensures that if `MODULE_PATH` // is left anywhere in the install script, it will fail to install. remove_module_path_from_control_file(&extension_info); // rename timescaledb_toolkit.so to timescaledb_toolkit-.so add_version_to_binary(&extension_info); // replace `MODULE_PATH` with `$libdir/timescaledb_toolkit-` add_version_to_install_script(&extension_info); generate_update_scripts(&extension_info); Ok(()) } struct ExtensionInfo { control_file: PathBuf, current_version: String, upgradeable_from: Vec, bin_dir: PathBuf, extension_dir: PathBuf, } fn get_extension_info_from_pg_config(pg_config: &str) -> xshell::Result { let bin_dir = cmd!("{pg_config} --pkglibdir").read()?; let share_dir = cmd!("{pg_config} --sharedir").read()?; let extension_dir = path!(share_dir / "extension"); let control_file = path!(extension_dir / "timescaledb_toolkit.control"); let control_contents = fs::read_to_string(&control_file).unwrap_or_else(|e| { panic!( "cannot read control file {} due to {e}", control_file.to_string_lossy() ) }); let current_version = get_current_version(&control_contents); eprintln!("Generating Version {current_version}"); let upgradeable_from = get_upgradeable_from(&control_contents); eprintln!("Upgradable From {upgradeable_from:?}"); let extension_info = ExtensionInfo { control_file, current_version, upgradeable_from, bin_dir: bin_dir.into(), extension_dir, }; Ok(extension_info) } fn get_extension_info_from_dir(root: &str) -> xshell::Result { use std::ffi::OsStr; let walker = walkdir::WalkDir::new(root).contents_first(true); let mut extension_info = None; let mut bin_dir = None; for entry in walker { let entry = entry.unwrap(); if entry.file_type().is_file() { let path = entry.into_path(); if path.extension() == Some(OsStr::new("control")) { // found the control file let extension_dir = path .parent() .expect("control file not in dir") .to_path_buf(); extension_info = Some((extension_dir, path)); } else if path.extension() == Some(OsStr::new("so")) { // found the binary bin_dir = Some(path.parent().expect("binary file not in dir").to_path_buf()); } if extension_info.is_some() && bin_dir.is_some() { break; } } } if bin_dir.is_none() || extension_info.is_none() { panic!("could not find extension objects") } let bin_dir = bin_dir.unwrap(); let (extension_dir, control_file) = extension_info.unwrap(); let control_contents = fs::read_to_string(&control_file).unwrap_or_else(|e| { panic!( "cannot read control file {} due to {e}", control_file.to_string_lossy() ) }); let current_version = get_current_version(&control_contents); eprintln!("Generating Version {current_version}"); let upgradeable_from = get_upgradeable_from(&control_contents); eprintln!("Upgradable From {upgradeable_from:?}"); let extension_info = ExtensionInfo { control_file, current_version, upgradeable_from, bin_dir, extension_dir, }; Ok(extension_info) } fn get_current_version(control_contents: &str) -> String { get_field_val(control_contents, "default_version").to_string() } fn get_upgradeable_from(control_contents: &str) -> Vec { // versions is a comma-delimited list of versions let versions = get_field_val(control_contents, "upgradeable_from"); versions .split_terminator(',') .map(|version| version.trim().to_string()) .collect() } fn remove_module_path_from_control_file(ExtensionInfo { control_file, .. }: &ExtensionInfo) { let tmp_file = control_file.with_extension("control.tmp"); transform_file_to(control_file, &tmp_file, |line| { if line.starts_with("module_pathname") { return "".to_string(); } line }); rename_file(tmp_file, control_file); } fn add_version_to_binary( ExtensionInfo { current_version, bin_dir, .. }: &ExtensionInfo, ) { let bin_file = path!(bin_dir / "timescaledb_toolkit.so"); let versioned_file = path!(bin_dir / format!("timescaledb_toolkit-{}.so", current_version)); rename_file(bin_file, versioned_file); } fn add_version_to_install_script( ExtensionInfo { current_version, extension_dir, .. }: &ExtensionInfo, ) { let install_script = path!(extension_dir / format!("timescaledb_toolkit--{current_version}.sql")); let versioned_script = install_script.with_extension("sql.tmp"); let module_path = format!("$libdir/timescaledb_toolkit-{current_version}"); transform_file_to(&install_script, &versioned_script, |line| { assert!( !line.contains("CREATE OR REPLACE FUNCTION"), "pgrx should not generate CREATE OR REPLACE in functions" ); if line.contains("MODULE_PATHNAME") { return line.replace("MODULE_PATHNAME", &module_path); } line }); rename_file(&versioned_script, &install_script); } // // upgrade scripts // fn generate_update_scripts( ExtensionInfo { current_version, upgradeable_from, extension_dir, .. }: &ExtensionInfo, ) { let extension_path = path!(extension_dir / format!("timescaledb_toolkit--{}.sql", current_version)); for from_version in upgradeable_from { let mut extension_file = open_file(&extension_path); let upgrade_path = path!( extension_dir / format!( "timescaledb_toolkit--{from}--{to}.sql", from = from_version, to = current_version ) ); let mut upgrade_file = create_file(&upgrade_path); update_script::generate_from_install( from_version, current_version, &mut extension_file, &mut upgrade_file, ); copy_permissions(extension_file, upgrade_file); } } trait PushLine { fn push_line(&mut self, line: &str); } impl PushLine for String { fn push_line(&mut self, line: &str) { self.push_str(line); self.push('\n'); } } // // control file utils // // find a ` = ''` and extract `` fn get_field_val<'a>(contents: &'a str, field: &str) -> &'a str { contents .lines() .filter(|line| line.contains(field)) .map(get_quoted_field) .next() .unwrap_or_else(|| panic!("cannot read field `{field}` in control file")) } // given a ` = ''` extract `` fn get_quoted_field(line: &str) -> &str { let quoted = line .split('=') .nth(1) .unwrap_or_else(|| panic!("cannot find value in line `{line}`")); quoted .trim_start() .split_terminator('\'') .find(|s| !s.is_empty()) .unwrap_or_else(|| panic!("unquoted value in line `{line}`")) } // // file utils // fn open_file(path: impl AsRef) -> BufReader { let path = path.as_ref(); let file = File::open(path) .unwrap_or_else(|e| panic!("cannot open file `{}` due to {e}", path.to_string_lossy())); BufReader::new(file) } fn create_file(path: impl AsRef) -> BufWriter { let path = path.as_ref(); let file = File::create(path) .unwrap_or_else(|e| panic!("cannot create file `{}` due to {e}", path.to_string_lossy())); BufWriter::new(file) } fn rename_file(from: impl AsRef, to: impl AsRef) { let from = from.as_ref(); let to = to.as_ref(); fs::rename(from, to).unwrap_or_else(|e| { panic!( "cannot rename `{}` to `{}` due to `{e}`", from.to_string_lossy(), to.to_string_lossy() ) }); } fn transform_file_to( from: impl AsRef, to: impl AsRef, mut transform: impl FnMut(String) -> String, ) { let to_path = to.as_ref(); let mut to = create_file(to_path); let from_path = from.as_ref(); let mut from = open_file(from_path); for line in (&mut from).lines() { let line = line .unwrap_or_else(|e| panic!("cannot read `{}` due to {e}", from_path.to_string_lossy())); writeln!(&mut to, "{}", transform(line)).unwrap_or_else(|e| { panic!("cannot write to `{}` due to {e}", to_path.to_string_lossy()) }); } copy_permissions(from, to); } fn copy_permissions(from: BufReader, to: BufWriter) { let permissions = from.into_inner().metadata().unwrap().permissions(); to.into_inner() .unwrap() .set_permissions(permissions) .unwrap(); } ================================================ FILE: tools/post-install/src/update_script.rs ================================================ use std::{ collections::HashSet, io::{BufRead, Write}, iter::Peekable, }; use crate::PushLine; static ALTERABLE_PROPERTIES: [&str; 6] = [ "RECEIVE", "SEND", "TYPMOD_IN", "TYPMOD_OUT", "ANALYZE", "STORAGE", ]; #[path = "../../../extension/src/stabilization_info.rs"] mod stabilization_info; // our update script is a copy of the install script with the following changes // 1. we drop the experimental schema so everything inside it is dropped. // 2. drop the event triggers in case we're coming from a version that had them // 3. for all CREATEs we check if the object is new in `current_version` // a. if it is, we output the CREATE as-is // b. if it's not, we output the equivalent REPLACE, if one is needed pub(crate) fn generate_from_install( from_version: &str, current_version: &str, extension_file: impl BufRead, mut upgrade_file: impl Write, ) { let new_stabilizations = new_stabilizations(from_version, current_version); writeln!( &mut upgrade_file, "DROP SCHEMA IF EXISTS toolkit_experimental CASCADE;\n\ -- drop the EVENT TRIGGERs; there's no CREATE OR REPLACE for those DROP EVENT TRIGGER IF EXISTS disallow_experimental_deps CASCADE;\n\ DROP EVENT TRIGGER IF EXISTS disallow_experimental_dependencies_on_views CASCADE;\n\ DROP FUNCTION IF EXISTS disallow_experimental_dependencies();\n\ DROP FUNCTION IF EXISTS disallow_experimental_view_dependencies();\n\ DROP FUNCTION IF EXISTS timescaledb_toolkit_probe;" ) .unwrap(); let lines = extension_file .lines() .map(|line| line.expect("cannot read install script")) .peekable(); let mut script_creator = UpdateScriptCreator { lines, upgrade_file, new_stabilizations, }; while script_creator.has_pending_input() { let create = script_creator.find_create(); match create { Some(Create::Function(create)) => { script_creator.handle_create_functionlike(FunctionLike::Fn, create) } Some(Create::Aggregate(create)) => { script_creator.handle_create_functionlike(FunctionLike::Agg, create) } Some(Create::Type(create)) => script_creator.handle_create_type(create), Some(Create::Schema(create)) => { // TODO is there something more principled to do here? writeln!(script_creator.upgrade_file, "CREATE SCHEMA {create}").unwrap(); } Some(Create::Operator(create)) => script_creator.handle_create_operator(create), Some(Create::Cast(create)) => { // TODO we don't have a stable one of these yet // JOSH - we should probably check if the FUNCTION is experimental also if create.contains("toolkit_experimental.") || create.starts_with("(tests.") { writeln!(script_creator.upgrade_file, "CREATE CAST {create}").unwrap(); continue; } unimplemented!("unprepared for stable CAST: {create}") } None => continue, } } } struct UpdateScriptCreator where Lines: Iterator, Dst: Write, { lines: Peekable, upgrade_file: Dst, new_stabilizations: StabilizationInfo, } enum Create { Function(String), Aggregate(String), Type(String), Operator(String), Schema(String), Cast(String), } const MUST_FIND_MATCH: bool = false; const ALLOW_NO_MATCH: bool = true; impl UpdateScriptCreator where Lines: Iterator, Dst: Write, { fn has_pending_input(&mut self) -> bool { self.lines.peek().is_some() } // find a `CREATE ` and return the `` fn find_create(&mut self) -> Option { for line in &mut self.lines { // search for `CREATE FUNCTION/TYPE/OPERATOR ;` let trimmed = line.trim_start(); if let Some(created) = trimmed.strip_prefix("CREATE ") { let l = created.trim_start(); let create = match_start( l, [ ("FUNCTION", &mut |l| Create::Function(l.to_string())), ("AGGREGATE", &mut |l| Create::Aggregate(l.to_string())), ("TYPE", &mut |l| Create::Type(l.to_string())), ("OPERATOR", &mut |l| Create::Operator(l.to_string())), ("SCHEMA", &mut |l| Create::Schema(l.to_string())), ("CAST", &mut |l| Create::Cast(l.to_string())), ], ); if create.is_some() { return create; } unreachable!("unexpected CREATE `{trimmed}`") } writeln!(self.upgrade_file, "{line}").unwrap(); } return None; // find which of a number of matchers a str starts with, and return the // rest. In other words, if find the first matcher matcher such that the // str is ` ` and return the `` #[allow(clippy::type_complexity)] fn match_start( line: &str, matchers: [(&str, &mut dyn FnMut(&str) -> T); N], ) -> Option { for (matcher, constructor) in matchers { if let Some(line) = line.strip_prefix(matcher) { let line = line.trim_start(); return Some(constructor(line)); } } None } } // handle a function-like create: if the function or aggregate is new in this // version use `CREATE FUNCTION/AGGREGATE` to create the function, otherwise use // `CREATE OR REPLACE` to update it to the newest version fn handle_create_functionlike(&mut self, is_function: FunctionLike, mut create: String) { if create.starts_with("toolkit_experimental") || create.starts_with("tests") { writeln!(self.upgrade_file, "{} {}", is_function.create(), create).unwrap(); return; } if !create.contains(')') { // look for the end of the argument list create.push('\n'); for line in &mut self.lines { create.push_line(&line); if line.contains(')') { break; } } } self.write_create_functionlike(is_function, &create); } fn write_create_functionlike(&mut self, is_function: FunctionLike, create_stmt: &str) { // parse a function or aggregate // it should look something like // ``` // ""("" ,*) ... // ``` let (name, rem) = parse_ident(create_stmt); let types = parse_arg_types(rem); let function = Function { name, types }; // write if self.new_stabilizations.new_functions.contains(&function) { writeln!( self.upgrade_file, "{} {}", is_function.create(), create_stmt ) .expect("cannot write create function") } else { writeln!( self.upgrade_file, "{} {}", is_function.create_or_replace(), create_stmt ) .expect("cannot write create or replace function") } } fn handle_create_type(&mut self, create: String) { let type_name = extract_name(&create); if type_name.starts_with("toolkit_experimental") || type_name.starts_with("tests") { writeln!(self.upgrade_file, "CREATE TYPE {create}").unwrap(); return; } if self.new_stabilizations.new_types.contains(&type_name) { writeln!(self.upgrade_file, "CREATE TYPE {create}").unwrap(); return; } if create.trim_end().ends_with(';') { // found `CREATE TYPE ;` we skip this in update scripts } else if create.trim_end().ends_with('(') { // found // ``` // CREATE TYPE ( // ... // ); // ``` // alter the type to match the new properties let alters = self.get_alterable_properties(); self.write_alter_type(&type_name, &alters); } else { unreachable!() } } fn get_alterable_properties(&mut self) -> Vec> { self.get_properties(&ALTERABLE_PROPERTIES[..], ALLOW_NO_MATCH); // Should return alters here, except PG12 doesn't allow alterations to type properties. // Once we no longer support PG12 change this back to returning alters vec![] } fn write_alter_type(&mut self, type_name: &str, alters: &[Option]) { let mut alter_statement = String::new(); for (i, alter) in alters.iter().enumerate() { use std::fmt::Write; let value = match alter { None => continue, Some(value) => value, }; if alter_statement.is_empty() { write!(&mut alter_statement, "ALTER TYPE {type_name} SET (") .expect("cannot write ALTER"); } else { alter_statement.push_str(", "); } write!( &mut alter_statement, "{} = {value}", ALTERABLE_PROPERTIES[i] ) .expect("cannot write ALTER"); } if !alter_statement.is_empty() { alter_statement.push_str(");"); } writeln!(self.upgrade_file, "{alter_statement}").expect("cannot write ALTER TYPE"); } fn handle_create_operator(&mut self, create: String) { assert!(create.trim_end().ends_with('(')); // found // ``` // CREATE OPERATOR ( // PROCEDURE=..., // LEFTARG=..., // RIGHTARG=... // ); // ``` // if any of `PROCEDURE`, `LEFTARG`, or `RIGHTARG` refer to and // experimental object the operator is experimental, otherwise it isn't let op = extract_name(&create); let fields = self.get_properties(&["PROCEDURE", "LEFTARG", "RIGHTARG"], MUST_FIND_MATCH); let is_experimental = fields .iter() .filter_map(|f| f.as_ref()) .any(|f| f.contains("toolkit_experimental")); let parse_operator_arg_type = |field: &Option| { field .as_ref() .unwrap() // remove everything after the comma, if one exists .split_terminator(',') .next() .unwrap() // remove any trailing comments .split_terminator("/*") .next() .unwrap() .to_ascii_lowercase() // handle `DOUBLE PRECISION` .split_ascii_whitespace() .map(|s| s.to_string()) .collect() }; let operator = Function { name: op.clone(), types: vec![ parse_operator_arg_type(&fields[1]), parse_operator_arg_type(&fields[2]), ], }; if is_experimental || self.new_stabilizations.new_operators.contains(&operator) { writeln!( self.upgrade_file, "CREATE OPERATOR {} (\n \ PROCEDURE={}\n \ LEFTARG={}\n \ RIGHTARG={}\n \ );", op, fields[0].as_ref().unwrap(), fields[1].as_ref().unwrap(), fields[2].as_ref().unwrap(), ) .expect("cannot write CREATE OPERATOR") } } fn get_properties(&mut self, fields: &[&str], allow_no_match: bool) -> Vec> { let mut properties = vec![None; fields.len()]; for line in &mut self.lines { // found `)` means we're done with // ``` // CREATE ( // ... // ); // ``` if line.trim_start().starts_with(')') { break; } let mut split = line.split('='); let field = split.next().unwrap().trim(); let value = split .next() .unwrap_or_else(|| panic!("no value for field {field}")) .trim(); assert_eq!(split.next(), None); let mut found_match = false; for (i, property) in fields.iter().enumerate() { if field.eq_ignore_ascii_case(property) { properties[i] = Some(value.to_string()); found_match = true; } } if !found_match && !allow_no_match { panic!("{field} is not considered an acceptable property for this object") } } properties } } enum FunctionLike { Fn, Agg, } impl FunctionLike { fn create(&self) -> &'static str { match self { FunctionLike::Fn => "CREATE FUNCTION", FunctionLike::Agg => "CREATE AGGREGATE", } } fn create_or_replace(&self) -> &'static str { match self { FunctionLike::Fn => "CREATE OR REPLACE FUNCTION", FunctionLike::Agg => "CREATE OR REPLACE AGGREGATE", } } } fn parse_arg_types(stmt: &str) -> Vec> { // extract the types from a // `( ,* )` // with arbitrary interior whitespace and comments into a // `Vec>` let stmt = stmt.trim_start(); assert!(stmt.starts_with('('), "stmt.starts_with('(') {stmt}"); let end = stmt.find(')').expect("cannot find ')' for arg list"); let args = &stmt[1..end]; let mut types = vec![]; // TODO strip out comments for arg in args.split_terminator(',') { let ty = arg .split_whitespace() .filter(remove_block_comments()) // skip any block comments .skip(1) // skip the identifier at the start .take_while(|s| !s.starts_with("--")) // skip any line comments .map(|s| s.to_ascii_lowercase()) .collect(); types.push(ty) } return types; fn remove_block_comments() -> impl FnMut(&&str) -> bool { let mut keep = true; move |s| match *s { "*/" => { let ret = keep; keep = true; ret } "/*" => { keep = false; false } _ => keep, } } } fn parse_ident(mut stmt: &str) -> (String, &str) { // parse `` or `""` let quoted = stmt.starts_with('"'); if quoted { stmt = &stmt[1..]; let end = stmt.find('"').expect("cannot find closing quote"); let ident = stmt[..end].to_string(); (ident, &stmt[end + 1..]) } else { let end = stmt .find(|c| !(char::is_alphanumeric(c) || c == '_')) .expect("cannot find end of ident"); let ident = stmt[..end].to_string(); (ident, &stmt[end..]) } } fn extract_name(line: &str) -> String { let mut name: &str = line.split_ascii_whitespace().next().expect("no type name"); if name.ends_with(';') { name = &name[..name.len() - 1]; } name.to_ascii_lowercase() } #[derive(Debug)] #[allow(dead_code)] pub(crate) struct StabilizationInfo { pub new_functions: HashSet, pub new_types: HashSet, pub new_operators: HashSet, } pub(crate) fn new_stabilizations(from_version: &str, to_version: &str) -> StabilizationInfo { StabilizationInfo { new_functions: stabilization_info::STABLE_FUNCTIONS(from_version, to_version), new_types: stabilization_info::STABLE_TYPES(from_version, to_version), new_operators: stabilization_info::STABLE_OPERATORS(from_version, to_version), } } #[derive(Hash, Clone, PartialEq, Eq, Debug)] pub(crate) struct Function { name: String, types: Vec>, } #[derive(Debug)] pub(crate) struct StaticFunction { name: &'static str, types: &'static [&'static [&'static str]], } #[derive(Eq, PartialEq, Ord, PartialOrd, Debug)] struct Version { major: u64, minor: u64, patch: u64, } fn version(s: &str) -> Version { let mut nums = s.split('.'); let version = Version { major: nums .next() .unwrap_or_else(|| panic!("no major version in `{s}`")) .parse() .unwrap_or_else(|e| panic!("error {e} for major version in `{s}`")), minor: nums .next() .unwrap_or_else(|| panic!("no minor version in `{s}`")) .parse() .unwrap_or_else(|e| panic!("error {e} for minor version in `{s}`")), patch: nums .next() .unwrap_or("0") .trim_end_matches("-dev") .parse() .unwrap_or_else(|e| panic!("error {e} for major version in `{s}`")), }; if nums.next().is_some() { panic!("extra `.`s in `{s}`") } version } fn new_objects<'a, T: std::fmt::Debug>( stabilizations: &'a [(&'a str, T)], from_version: &'a str, to_version: &'a str, ) -> impl Iterator + 'a { let to_version = to_version.trim_end_matches("-dev"); let from_version = version(from_version); let to_version = version(to_version); stabilizations .iter() .skip_while(move |(version_str, _)| { let version = version(version_str); version > to_version }) .take_while(move |(at, _)| at != &"prehistory" && version(at) > from_version) } #[macro_export] macro_rules! functions_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($fn_name: ident ( $( $($fn_type: ident)+ ),* ) ),* $(,)? } )* ) => { #[allow(non_snake_case)] pub(crate) fn $export_symbol(from_version: &str, to_version: &str) -> super::HashSet { use super::*; static STABILIZATIONS: &[(&str, &[StaticFunction])] = &[ $( ( $version, &[ $(StaticFunction { name: stringify!($fn_name), types: &[$( &[$( stringify!($fn_type), )*], )*], },)* ], ), )* ]; new_objects(STABILIZATIONS, from_version, to_version) .flat_map(|(_, creates)| creates.into_iter().map(|StaticFunction { name, types }| Function { name: name.to_ascii_lowercase(), types: types.into_iter().map(|v| v.into_iter().map(|s| s.to_ascii_lowercase()).collect() ).collect(), }) ) .collect() } }; } #[macro_export] macro_rules! types_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($type_name: ident),* $(,)? } )* ) => { #[allow(non_snake_case)] pub(crate) fn $export_symbol(from_version: &str, to_version: &str) -> super::HashSet { use super::*; static STABILIZATIONS: &[(&str, &[&str])] = &[ $( ( $version, &[ $(stringify!($type_name),)* ], ), )* ]; new_objects(STABILIZATIONS, from_version, to_version) .flat_map(|(_, creates)| creates.into_iter().map(|t| t.to_ascii_lowercase()) ) .collect() } }; } #[macro_export] macro_rules! operators_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($operator_name: literal ( $( $($fn_type: ident)+ ),* ) ),* $(,)? } )* ) => { #[allow(non_snake_case)] pub(crate) fn $export_symbol(from_version: &str, to_version: &str) -> super::HashSet { use super::*; static STABILIZATIONS: &[(&str, &[StaticFunction])] = &[ $( ( $version, &[ $(StaticFunction { name: $operator_name, types: &[$( &[$( stringify!($fn_type), )*], )*], },)* ], ), )* ]; new_objects(STABILIZATIONS, from_version, to_version) .flat_map(|(_, creates)| creates.into_iter().map(|StaticFunction { name, types }| Function { name: name.to_ascii_lowercase(), types: types.into_iter().map(|v| v.into_iter().map(|s| s.to_ascii_lowercase()).collect() ).collect(), }) ) .collect() } }; } ================================================ FILE: tools/release ================================================ #!/bin/sh # This script automates release creation: # 1. Create release branch from target commit. # 1a. Validate contents of target commit (just upgradeable_from currently). # 2. Set toolkit version on branch. # 3. Run tests. # 4. Push (if -push) the branch so release-build-scripts repository [1] can see the commit from #2. # 5. Trigger (if -push) toolkit packaging actions in release-build-scripts repository. # 6. Tag the release (and push, if -push). [2] # 7. Prepare the main branch for the next release cycle. # 7a. Update upgradeable_form in control file. # 7b. Set toolkit version to released version with '-dev' appended. # 7c. Update Changelog.md . # 7d. Push to and create pull request for post-$VERSION branch (if -push). # 8. File issue for release tasks that are not yet automated (if -push). # [1] We need a self-hosted runner for arm64 build, which we can only get with # a private repository, so we must delegate packaging to that. # [2] This means we publish a tag before testing binaries. We'd rather test first. # TODO How? # - Can we have release-build-scripts gh back to an action over here? # - Can we have a trigger that watches for release-build-scripts action to finish? # Sample run: # tools/release -n -push -version 1.11.0 9c2b04d # git commit records these on commits (yes, all three). # TODO What should we use? I pulled this from the deb package metadata EMAIL=hello@timescale.com GIT_AUTHOR_NAME=tools/release GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME export EMAIL GIT_AUTHOR_NAME GIT_COMMITTER_NAME MAIN_BRANCH=main BRANCH_BASENAME=forge-stable- CONTROL=extension/timescaledb_toolkit.control TOML=extension/Cargo.toml UPGRADEABLE_FROM_RE="^# upgradeable_from = '[^']*'\$" NEXT_RELEASE_RE='^## Next Release (Date TBD)' . tools/dependencies.sh set -ex # TODO Install these into timescaledev/toolkit-builder image and delete this block. if [ "$1" = setup ]; then # Install cargo set-version (and cargo install is not idempotent). if ! cargo help set-version > /dev/null; then cargo install --version =$CARGO_EDIT cargo-edit fi # Install gh gh=`basename $GH_DEB_URL` curl --fail -LO $GH_DEB_URL sha256sum -c - <&2 exit $st } usage() { die 'release [-n] [-push] -version VERSION COMMIT' } # Return 0 iff working directory is clean. # Also prints any diff. assert_clean() { $nop git diff --exit-code } # Return 0 iff working directory is dirty. # Also prints any diff. assert_dirty() { [ -n "$nop" ] && return ! assert_clean } # Use start_commit, commit, and finish_commit to safely build a commit from # multiple automated edits. # - start_commit [file names] # Start a commit with the named changed files. # Any other edited file (dirty directory after commit) is an error. # - commit [file names] # Amend the commit after each automated edit. # Any other edited file (dirty directory after commit) is an error. # - finish_commit MESSAGE # Finalize the commit with the commit message MESSAGE. # Any edited files is an error. start_commit() { [ -z "$_PENDING_COMMIT" ] || die 'BUG: start_commit called twice' _PENDING_COMMIT=1 $nop git add "$@" $nop git commit -m pending assert_clean || die "working directory should be clean after commit $@" } commit() { [ -n "$_PENDING_COMMIT" ] || die 'BUG: commit called without start_commit' $nop git add "$@" $nop git commit --no-edit --amend assert_clean || die "working directory should be clean after commit $@" } finish_commit() { [ -n "$_PENDING_COMMIT" ] || die 'BUG: finish_commit called without start_commit' assert_clean || die "working directory should be clean to finish commit '$1'" _PENDING_COMMIT= (export GIT_COMMITTER_DATE="`date`" && $nop git commit --no-edit --amend "--date=$GIT_COMMITTER_DATE" -m "$1") } # Return 0 if this is a minor release (i.e. $PATCH is greater than zero). release_is_minor() { [ $PATCH -eq 0 ] } # Super simple option processing. push=false while [ $# -gt 0 ]; do arg=$1 shift case "$arg" in -n) dry_run_flag=--dry-run nop=: ;; # TODO Remove -y alias for -push . -push | -y) push=true ;; -version) VERSION=$1 shift COMMIT=$1 shift ;; *) usage ;; esac done [ -n "$VERSION" ] && [ -n "$COMMIT" ] || usage # And away we go! MAJOR=${VERSION%%.*} minpat=${VERSION#*.} MINOR=${minpat%.*} PATCH=${minpat#*.} POST_REL_BRANCH=post-$VERSION # 0. Sanity-check the surroundings. # working directory clean? assert_clean || die 'cowardly refusing to operate on dirty working directory' # 1. Create release branch from target commit. branch="$BRANCH_BASENAME"$VERSION $nop git checkout -b $branch $COMMIT # Sanity-check the branch contents. # control file matches expectations? count=`grep -c "$UPGRADEABLE_FROM_RE" $CONTROL` || die "upgradeable_from line malformed" if [ "$count" -ne 1 ]; then print >&2 "too many upgradeable_from lines matched:" grep >&2 "$UPGRADEABLE_FROM_RE" $CONTROL die fi # If we forget to update the Changelog (or forget to cherry-pick Changelog # updates), show a clear error message rather than letting the ed script fail # mysteriously. grep -qs "$NEXT_RELEASE_RE" Changelog.md || die 'Changelod.md lacks "Next Release" section' # 1a. Validate contents of target commit (just upgradeable_from currently). if ! release_is_minor; then # Releasing e.g. 1.13.2 - this one might be a cherry-pick, so we need to ensure upgradeable from 1.13.1 . # It is conceivable that we could intend to release 1.17.1 without # allowing upgrade from 1.17.0, but we can cross that bridge if we come # to it. prev=$MAJOR.$MINOR.$(( PATCH - 1 )) # The set of lines matching this pattern is a subset of the set required in preflight above. grep -Eqs "^# upgradeable_from = '[^']*,?$prev[,']" $CONTROL || die "$prev missing from upgradeable_from " fi # Else releasing e.g. 1.13.0 - these are never cherrypicks and we automatically set upgradeable_from on main. # 2. Set toolkit version. cargo set-version $dry_run_flag -p timescaledb_toolkit $VERSION assert_dirty || die "failed to set toolkit version to $VERSION in $TOML" start_commit $TOML # Update cargo.lock - this form of cargo update doesn't update dependency versions. $nop cargo update -p timescaledb_toolkit assert_dirty || die "failed to set toolkit version to $VERSION in Cargo.lock" commit Cargo.lock # Update Changelog.md . branch_commit_date=`git log -1 --pretty=format:%as $branch_commit` $nop ed Changelog.md < release-notes Changelog.md < FLAGS: --help Prints help information -V, --version Prints version information OPTIONS: -d, --database postgres database the root connection should use. By default this DB will only be used to spawn the individual test databases; no tests will run against it. -h, --host postgres host -a, --password postgres password -p, --port postgres port -f, --startup-file File containing SQL commands that should be run when each test database is created. -s, --startup-script SQL command that should be run when each test database is created. -u, --user postgres user ARGS: Path in which to search for tests ``` ## Formatting ## The tool looks through every markdown file in the provided path for SQL code blocks like ```SQL SELECT column_1, column_2, etc FROM foo ``` and will try to run them. The SQL is assumed to be followed with an `output` block like which contains the expected output for the command ```output column 1 | column 2 | etc ----------+----------+----- value 1 | value 1 | etc ``` Only the actual values are checked; the header, along with leading and trailing whitespace are ignored. If no `output` is provided the tester will validate that the output should be empty. Output validation can be suppressed by adding `ignore-output` after the `SQL` tag, like so ```SQL,ignore-output SELECT non_validated FROM foo ``` in which case the SQL will be run, and its output ignored. SQL code blocks can be skipped entirely be adding `ignore` after the tag as in ```SQL,ignore This never runs, so it doesn't matter if it's valid SQL ``` By default, each code block is run in its own transaction, which is rolled back after the command completes. If you want to run outside a transaction, because you're running commands that cannot be run within a transaction, or because you want to change global state, you can mark a block as non-transactional like so ```SQL,non-transactional CREATE TABLE bar(); ``` Every file is run in its own database, so such commands can only affect the remainder of the current file. This tag can be combined with any of the others. The tool supports adding startup scripts that are run first on every new database. This can be useful for repetitive initialization tasks, like CREATEing extensions that must be done for every file. For file-specific initialization, you can you `non-transactional` blocks. These blocks can be hidden `
` like so if you want them to be invisible to readers. ## Acknowledgements ## Inspired by [rustdoc](https://doc.rust-lang.org/rustdoc/what-is-rustdoc.html) and [rust-skeptic](https://github.com/budziq/rust-skeptic). ================================================ FILE: tools/sql-doctester/src/main.rs ================================================ use std::{ collections::HashMap, ffi::OsStr, fs, io::{self, Write}, process::exit, }; use colored::Colorize; use clap::{Arg, Command}; use runner::ConnectionConfig; mod parser; mod runner; fn main() { let matches = Command::new("sql-doctester") .about("Runs sql commands from docs/ dir to test out toolkit") .arg_required_else_help(true) .arg(Arg::new("HOST").short('h').long("host").takes_value(true)) .arg(Arg::new("PORT").short('p').long("port").takes_value(true)) .arg(Arg::new("USER").short('u').long("user").takes_value(true)) .arg( Arg::new("PASSWORD") .short('a') .long("password") .takes_value(true), ) .arg(Arg::new("DB").short('d').long("database").takes_value(true)) .arg(Arg::new("INPUT").takes_value(true)) .mut_arg("help", |_h| Arg::new("help").long("help")) .get_matches(); let dirname = matches.value_of("INPUT").expect("need input"); let connection_config = ConnectionConfig { host: matches.value_of("HOST"), port: matches.value_of("PORT"), user: matches.value_of("USER"), password: matches.value_of("PASSWORD"), database: matches.value_of("DB"), }; let startup_script = include_str!("startup.sql"); let all_tests = extract_tests(dirname); let mut num_errors = 0; let stdout = io::stdout(); let mut out = stdout.lock(); let on_error = |test: Test, error: runner::TestError| { if num_errors == 0 { let _ = writeln!(&mut out, "{}\n", "Tests Failed".bold().red()); } num_errors += 1; let _ = writeln!( &mut out, "{} {}\n", test.location.bold().blue(), test.header.bold().dimmed() ); let _ = writeln!(&mut out, "{}", error.annotate_position(&test.text)); let _ = writeln!(&mut out, "{error}\n"); }; runner::run_tests(connection_config, startup_script, all_tests, on_error); if num_errors > 0 { exit(1) } let _ = writeln!(&mut out, "{}\n", "Tests Passed".bold().green()); } #[derive(Debug, PartialEq, Eq)] #[must_use] pub struct TestFile { name: String, stateless: bool, tests: Vec, } #[derive(Debug, PartialEq, Eq)] #[must_use] pub struct Test { location: String, header: String, text: String, output: Vec>, transactional: bool, ignore_output: bool, precision_limits: HashMap, } fn extract_tests(root: &str) -> Vec { // TODO handle when root is a file let mut all_tests = vec![]; let walker = walkdir::WalkDir::new(root) .follow_links(true) .sort_by(|a, b| a.path().cmp(b.path())); for entry in walker { let entry = entry.unwrap(); if !entry.file_type().is_file() { continue; } if entry.path().extension() != Some(OsStr::new("md")) { continue; } let realpath; let path = if entry.file_type().is_symlink() { realpath = fs::read_link(entry.path()).unwrap(); &*realpath } else { entry.path() }; let contents = fs::read_to_string(path).unwrap(); let tests = parser::extract_tests_from_string(&contents, &entry.path().to_string_lossy()); if !tests.tests.is_empty() { all_tests.push(tests) } } all_tests } ================================================ FILE: tools/sql-doctester/src/parser.rs ================================================ use std::collections::HashMap; use pulldown_cmark::{ CodeBlockKind::Fenced, CowStr, Event, Parser, Tag::{CodeBlock, Heading}, }; use crate::{Test, TestFile}; // parsers the grammar `(heading* (test output?)*)*` pub fn extract_tests_from_string(s: &str, file_stem: &str) -> TestFile { let mut parser = Parser::new(s).into_offset_iter().peekable(); let mut heading_stack = vec![]; let mut tests = vec![]; let mut last_test_seen_at = 0; let mut lines_seen = 0; let mut stateless = true; // consume the parser until an tag is reached, performing an action on each text macro_rules! consume_text_until { ($parser: ident yields $end: pat => $action: expr) => { for (event, _) in &mut parser { match event { Event::Text(text) => $action(text), $end => break, _ => (), } } }; } 'block_hunt: while let Some((event, span)) = parser.next() { match event { // we found a heading, add it to the stack Event::Start(Heading(level)) => { heading_stack.truncate(level as usize - 1); let mut header = "`".to_string(); consume_text_until!(parser yields Event::End(Heading(..)) => |text: CowStr| header.push_str(&text) ); header.truncate(header.trim_end().len()); header.push('`'); heading_stack.push(header); } // we found a code block, if it's a test add the test Event::Start(CodeBlock(Fenced(ref info))) => { let code_block_info = parse_code_block_info(info); // non-test code block, consume it and continue looking if let BlockKind::Other = code_block_info.kind { for (event, _) in &mut parser { if let Event::End(CodeBlock(Fenced(..))) = event { break; } } continue 'block_hunt; } let current_line = { let offset = span.start; lines_seen += bytecount::count(&s.as_bytes()[last_test_seen_at..offset], b'\n'); last_test_seen_at = offset; lines_seen + 1 }; if let BlockKind::Output = code_block_info.kind { panic!( "found output with no test test.\n{file_stem}:{current_line} {heading_stack:?}" ) } assert!(matches!(code_block_info.kind, BlockKind::Sql)); stateless &= code_block_info.transactional; let mut test = Test { location: format!("{file_stem}:{current_line}"), header: if heading_stack.is_empty() { "".to_string() } else { heading_stack.join("::") }, text: String::new(), output: Vec::new(), transactional: code_block_info.transactional, ignore_output: code_block_info.ignore_output, precision_limits: code_block_info.precision_limits, }; // consume the lines of the test consume_text_until!(parser yields Event::End(CodeBlock(Fenced(..))) => |text: CowStr| test.text.push_str(&text) ); // search to see if we have output loop { match parser.peek() { // we found a code block, is it output? Some((Event::Start(CodeBlock(Fenced(info))), _)) => { let code_block_info = parse_code_block_info(info); match code_block_info.kind { // non-output, continue at the top BlockKind::Sql | BlockKind::Other => { tests.push(test); continue 'block_hunt; } // output, consume it BlockKind::Output => { if !test.precision_limits.is_empty() && !code_block_info.precision_limits.is_empty() { panic!( "cannot have precision limits on both test and output.\n{file_stem}:{current_line} {heading_stack:?}" ) } test.precision_limits = code_block_info.precision_limits; let _ = parser.next(); break; } } } // test must be over, continue at the top Some((Event::Start(CodeBlock(..)), _)) | Some((Event::Start(Heading(..)), _)) => { tests.push(test); continue 'block_hunt; } // EOF, we're done None => { tests.push(test); break 'block_hunt; } // for now we allow text between the test and it's output // TODO should/can we forbid this? _ => { let _ = parser.next(); } }; } // consume the output consume_text_until!(parser yields Event::End(CodeBlock(Fenced(..))) => |text: CowStr| { let rows = text.split('\n').skip(2).filter(|s| !s.is_empty()).map(|s| s.split('|').map(|s| s.trim().to_string()).collect::>() ); test.output.extend(rows); } ); tests.push(test); } _ => (), } } TestFile { name: file_stem.to_string(), stateless, tests, } } struct CodeBlockInfo { kind: BlockKind, transactional: bool, ignore_output: bool, precision_limits: HashMap, } #[derive(Clone, Copy)] enum BlockKind { Sql, Output, Other, } fn parse_code_block_info(info: &str) -> CodeBlockInfo { let tokens = info.split(','); let mut info = CodeBlockInfo { kind: BlockKind::Other, transactional: true, ignore_output: false, precision_limits: HashMap::new(), }; for token in tokens { match token.trim() { "ignore" => { if let BlockKind::Sql = info.kind { info.kind = BlockKind::Other; } } "non-transactional" => info.transactional = false, "ignore-output" => info.ignore_output = true, "output" => info.kind = BlockKind::Output, s if s.eq_ignore_ascii_case("sql") => info.kind = BlockKind::Sql, p if p.starts_with("precision") => { // syntax `precision(col: bytes)` let precision_err = || -> ! { panic!("invalid syntax for `precision(col: bytes)` found `{p}`") }; let arg = &p["precision".len()..]; if arg.as_bytes().first() != Some(&b'(') || arg.as_bytes().last() != Some(&b')') { precision_err() } let arg = &arg[1..arg.len() - 1]; let args: Vec<_> = arg.split(':').collect(); if args.len() != 2 { precision_err() } let column = args[0].trim().parse().unwrap_or_else(|_| precision_err()); let length = args[1].trim().parse().unwrap_or_else(|_| precision_err()); let old = info.precision_limits.insert(column, length); if old.is_some() { panic!("duplicate precision for column {column}") } } _ => {} } } info } #[cfg(test)] mod test { use std::collections::HashMap; #[test] fn extract() { use super::{Test, TestFile}; let file = r##" # Test Parsing ```SQL select * from foo ``` ```output ``` ```SQL select * from multiline ``` ```output ?column? ---------- value ``` ## ignored ```SQL,ignore select * from foo ``` ## non-transactional ```SQL,non-transactional select * from bar ``` ```output, precision(1: 3) a | b ---+--- 1 | 2 ``` ## no output ```SQL,ignore-output select * from baz ``` ## end by header ```SQL select * from quz ``` ## end by file ```SQL select * from qat ``` "##; let tests = super::extract_tests_from_string(file, "/test/file.md"); let expected = TestFile { name: "/test/file.md".to_string(), stateless: false, tests: vec![ Test { location: "/test/file.md:3".to_string(), header: "`Test Parsing`".to_string(), text: "select * from foo\n".to_string(), output: vec![], transactional: true, ignore_output: false, precision_limits: HashMap::new(), }, Test { location: "/test/file.md:9".to_string(), header: "`Test Parsing`".to_string(), text: "select * from multiline\n".to_string(), output: vec![vec!["value".to_string()]], transactional: true, ignore_output: false, precision_limits: HashMap::new(), }, Test { location: "/test/file.md:24".to_string(), header: "`Test Parsing`::`non-transactional`".to_string(), text: "select * from bar\n".to_string(), output: vec![vec!["1".to_string(), "2".to_string()]], transactional: false, ignore_output: false, precision_limits: [(1, 3)].iter().cloned().collect(), }, Test { location: "/test/file.md:34".to_string(), header: "`Test Parsing`::`no output`".to_string(), text: "select * from baz\n".to_string(), output: vec![], transactional: true, ignore_output: true, precision_limits: HashMap::new(), }, Test { location: "/test/file.md:39".to_string(), header: "`Test Parsing`::`end by header`".to_string(), text: "select * from quz\n".to_string(), output: vec![], transactional: true, ignore_output: false, precision_limits: HashMap::new(), }, Test { location: "/test/file.md:44".to_string(), header: "`Test Parsing`::`end by file`".to_string(), text: "select * from qat\n".to_string(), output: vec![], transactional: true, ignore_output: false, precision_limits: HashMap::new(), }, ], }; assert!( tests == expected, "left: {:#?}\n right: {:#?}", tests, expected ); } } ================================================ FILE: tools/sql-doctester/src/runner.rs ================================================ use rayon::{iter::ParallelIterator, prelude::*}; use std::{borrow::Cow, error::Error, fmt}; use colored::Colorize; use postgres::{error::DbError, Client, NoTls, SimpleQueryMessage}; use uuid::Uuid; use crate::{Test, TestFile}; #[derive(Copy, Clone)] pub struct ConnectionConfig<'s> { pub host: Option<&'s str>, pub port: Option<&'s str>, pub user: Option<&'s str>, pub password: Option<&'s str>, pub database: Option<&'s str>, } impl<'s> ConnectionConfig<'s> { fn config_string(&self) -> Cow<'s, str> { use std::fmt::Write; let ConnectionConfig { host, port, user, password, database, } = self; let mut config = String::new(); if let Some(host) = host { let _ = write!(&mut config, "host={host} "); } if let Some(port) = port { let _ = write!(&mut config, "port={port} "); } let _ = match user { Some(user) => write!(&mut config, "user={user} "), None => write!(&mut config, "user=postgres "), }; if let Some(password) = password { let _ = write!(&mut config, "password={password} "); } if let Some(database) = database { let _ = write!(&mut config, "dbname={database} "); } Cow::Owned(config) } } pub fn run_tests( connection_config: ConnectionConfig<'_>, startup_script: &str, all_tests: Vec, mut on_error: OnErr, ) { let root_connection_config = connection_config.config_string(); let root_connection_config = &*root_connection_config; eprintln!("running {} test files", all_tests.len()); let start_db = |tests_name: &str| { let db_name = format!("doctest_db__{}", Uuid::new_v4()); let finish_name = tests_name.to_string(); let drop_name = db_name.to_string(); let deferred = Deferred(move || { eprintln!("{} {}", "Finished".bold().green(), finish_name); let _ = Client::connect(root_connection_config, NoTls) .and_then(|mut client| { client.simple_query(&format!(r#"DROP DATABASE IF EXISTS "{drop_name}""#)) }) .map_err(|e| eprintln!("error dropping DB {e}")); }); { eprintln!("{} {}", "Starting".bold().green(), tests_name); let mut root_client = Client::connect(root_connection_config, NoTls) .expect("could not connect to postgres"); root_client .simple_query(&format!(r#"CREATE DATABASE "{db_name}""#)) .expect("could not create test DB"); } (db_name, deferred) }; let (stateless_db, _dropper) = match all_tests.iter().any(|t| t.stateless) { false => (None, None), true => { let (name, dropper) = start_db("stateless tests"); (Some(name), Some(dropper)) } }; if let Some(db) = stateless_db.as_ref() { let stateless_connection_config = ConnectionConfig { database: Some(db), ..connection_config }; let mut client = Client::connect(&stateless_connection_config.config_string(), NoTls) .expect("could not connect to test DB"); let _ = client .simple_query(startup_script) .expect("could not run init script"); } let stateless_db = stateless_db.as_ref(); let errors: Vec<_> = all_tests .into_par_iter() .flat_map_iter(|tests| { let (db_name, deferred) = match tests.stateless { true => { eprintln!("{} {}", "Running".bold().green(), tests.name); (stateless_db.map(|s| Cow::Borrowed(&**s)), None) } false => { let (db_name, deferred) = start_db(&tests.name); (Some(Cow::Owned(db_name)), Some(deferred)) } }; let test_connection_config = ConnectionConfig { database: db_name.as_deref(), ..connection_config }; let mut client = Client::connect(&test_connection_config.config_string(), NoTls) .expect("could not connect to test DB"); if !tests.stateless { let _ = client .simple_query(startup_script) .expect("could not run init script"); } let deferred = deferred; tests.tests.into_iter().map(move |test| { let output = if test.transactional { run_transactional_test(&mut client, &test) } else { run_nontransactional_test(&mut client, &test) }; // ensure that the DB is dropped after the client let _deferred = &deferred; (test, output) }) }) .collect(); drop(_dropper); for (test, error) in errors { match error { Ok(..) => continue, Err(error) => on_error(test, error), } } } fn run_transactional_test(client: &mut Client, test: &Test) -> Result<(), TestError> { let mut txn = client.transaction()?; let output = txn.simple_query(&test.text)?; let res = validate_output(output, test); txn.rollback()?; res } fn run_nontransactional_test(client: &mut Client, test: &Test) -> Result<(), TestError> { let output = client.simple_query(&test.text)?; validate_output(output, test) } fn validate_output(output: Vec, test: &Test) -> Result<(), TestError> { use SimpleQueryMessage::*; if test.ignore_output { return Ok(()); } let mut rows = Vec::with_capacity(test.output.len()); for r in output { match r { RowDescription(_r) => continue, Row(r) => { let mut row: Vec = Vec::with_capacity(r.len()); for i in 0..r.len() { row.push(r.get(i).unwrap_or("").to_string()) } rows.push(row); } CommandComplete(..) => break, _ => { eprintln!("unhandled message: {r:?} for test: {test:?}"); unreachable!() } } } let output_error = |header: &str| { format!( "{}\n{expected}\n{}{}\n\n{received}\n{}{}\n\n{delta}\n{}", header, stringify_table(&test.output), format!("({} rows)", test.output.len()).dimmed(), stringify_table(&rows), format!("({} rows)", rows.len()).dimmed(), stringify_delta(&test.output, &rows), expected = "Expected".bold().blue(), received = "Received".bold().blue(), delta = "Delta".bold().blue(), ) }; if test.output.len() != rows.len() { return Err(TestError::OutputError(output_error( "output has a different number of rows than expected.", ))); } fn clamp_len<'s>(mut col: &'s str, idx: usize, test: &Test) -> &'s str { let max_len = test.precision_limits.get(&idx); if let Some(&max_len) = max_len { if col.len() > max_len { col = &col[..max_len] } } col } let all_eq = test.output.iter().zip(rows.iter()).all(|(out, row)| { out.len() == row.len() && out .iter() .zip(row.iter()) .enumerate() .all(|(i, (o, r))| clamp_len(o, i, test) == clamp_len(r, i, test)) }); if !all_eq { return Err(TestError::OutputError(output_error( "output has a different values than expected.", ))); } Ok(()) } fn stringify_table(table: &[Vec]) -> String { use std::{cmp::max, fmt::Write}; if table.is_empty() { return "---".to_string(); } let mut width = vec![0; table[0].len()]; for row in table { // Ensure that we have width for every column // TODO this shouldn't be needed, but sometimes is? if width.len() < row.len() { width.extend((0..row.len() - width.len()).map(|_| 0)); } for (i, value) in row.iter().enumerate() { width[i] = max(width[i], value.len()) } } let mut output = String::with_capacity(width.iter().sum::() + width.len() * 3); for row in table { for (i, value) in row.iter().enumerate() { if i != 0 { output.push_str(" | ") } let _ = write!(&mut output, "{:>width$}", value, width = width[i]); } output.push('\n') } output } #[allow(clippy::needless_range_loop)] fn stringify_delta(left: &[Vec], right: &[Vec]) -> String { use std::{cmp::max, fmt::Write}; static EMPTY_ROW: Vec = vec![]; static EMPTY_VAL: String = String::new(); let mut width = vec![ 0; max( left.first().map(Vec::len).unwrap_or(0), right.first().map(Vec::len).unwrap_or(0) ) ]; let num_rows = max(left.len(), right.len()); for i in 0..num_rows { let left = left.get(i).unwrap_or(&EMPTY_ROW); let right = right.get(i).unwrap_or(&EMPTY_ROW); let cols = max(left.len(), right.len()); for j in 0..cols { let left = left.get(j).unwrap_or(&EMPTY_VAL); let right = right.get(j).unwrap_or(&EMPTY_VAL); if left == right { width[j] = max(width[j], left.len()) } else { width[j] = max(width[j], left.len() + right.len() + 2) } } } let mut output = String::with_capacity(width.iter().sum::() + width.len() * 3); for i in 0..num_rows { let left = left.get(i).unwrap_or(&EMPTY_ROW); let right = right.get(i).unwrap_or(&EMPTY_ROW); let cols = max(left.len(), right.len()); for j in 0..cols { let left = left.get(j).unwrap_or(&EMPTY_VAL); let right = right.get(j).unwrap_or(&EMPTY_VAL); if j != 0 { let _ = write!(&mut output, " | "); } let (value, padding) = if left == right { (left.to_string(), width[j] - left.len()) } else { let padding = width[j] - (left.len() + right.len() + 2); let value = format!( "{}{}{}{}", "-".magenta(), left.magenta(), "+".yellow(), right.yellow() ); (value, padding) }; // trick to ensure correct padding, the color characters are counted // if done the normal way. let _ = write!(&mut output, "{:>padding$}{}", "", value, padding = padding); } let _ = writeln!(&mut output); } output } pub enum TestError { PgError(postgres::Error), OutputError(String), } impl fmt::Display for TestError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { TestError::PgError(error) => { match error.source().and_then(|e| e.downcast_ref::()) { Some(e) => { use postgres::error::ErrorPosition::*; let pos = match e.position() { Some(Original(pos)) => format!("At character {pos}"), Some(Internal { position, query }) => { format!("In internal query `{query}` at {position}") } None => String::new(), }; write!( f, "{}\n{}\n{}\n{}", "Postgres Error:".bold().red(), e, e.detail().unwrap_or(""), pos, ) } None => write!(f, "{error}"), } } TestError::OutputError(err) => write!(f, "{} {err}", "Error:".bold().red()), } } } impl From for TestError { fn from(error: postgres::Error) -> Self { TestError::PgError(error) } } impl TestError { pub fn annotate_position<'s>(&self, sql: &'s str) -> Cow<'s, str> { match self.location() { None => sql.into(), Some(pos) => format!( "{}{}{}", &sql[..pos as usize], "~>".bright_red(), &sql[pos as usize..], ) .into(), } } fn location(&self) -> Option { use postgres::error::ErrorPosition::*; match self { TestError::OutputError(..) => None, TestError::PgError(e) => match e .source() .and_then(|e| e.downcast_ref::().and_then(DbError::position)) { None => None, Some(Internal { .. }) => None, Some(Original(pos)) => Some(pos.saturating_sub(1)), }, } } } struct Deferred(T); impl Drop for Deferred { fn drop(&mut self) { self.0() } } ================================================ FILE: tools/sql-doctester/src/startup.sql ================================================ CREATE EXTENSION timescaledb; CREATE EXTENSION timescaledb_toolkit; SET SESSION TIMEZONE TO 'UTC'; -- utility for generating random numbers CREATE SEQUENCE rand START 567; CREATE FUNCTION test_random() RETURNS float AS 'SELECT ((nextval(''rand'')*34567)%1000)::float/1000' LANGUAGE SQL; ================================================ FILE: tools/testbin ================================================ #!/bin/sh # This script automates binary upgrade testing. # Sample run: # OS_NAME=ubuntu OS_VERSION=24.04 tools/testbin -version 1.11.0 -bindir .. -pgversions '13 14' deb # A released toolkit lists the versions it is upgradeable from in # extension/timescaledb_toolkit.control . This script processes those entries # and for each version from which upgrading is supported: # 1. Install old binaries (deb or rpm) for each supported postgresql release # 2. Run the 1st half of the upgrade tests # 3. Install the binary for the version under test # 4. Run the 2nd half of the upgrade tests # The distinction between environment variables and command-line options is # possibly inconsistent. The approach now is for general parameters to come # from the command line, and system-specific parameters to come from the # environment. Specifically, these are required in the environment for deb # packages only: # - OS_NAME # - OS_VERSION set -ex # Minimum version we support arm64 deb - could possibly go lower. # I know 1.8 at least builds on arm. MIN_DEB_ARM=1.10.1 # We added 1: epoch at 1.7.0. MIN_DEB_EPOCH=1.7.0 # TODO Unfortunate that pgrx allows neither specifying nor querying the port it # starts postgres on, so we duplicate that knowledge. Watch out for # that changing! PGRX_PORT_BASE=28800 CONTROL=extension/timescaledb_toolkit.control # For PG_VERSIONS default. . tools/dependencies.sh print() { printf '%s\n' "$*" } die() { st=${?:-0} if [ $st -eq 0 ]; then st=2 fi print "$*" >&2 exit $st } usage() { die 'testbin [-n] -bindir DIR -version VERSION -pgversions "[V1] [V2]..." ( ci | deb | rpm )' } # Requires: # - PGRX_PORT_BASE # - PG_VERSION # Sets: # - PG_PORT select_pg() { PG_PORT=$(( $PGRX_PORT_BASE + $PG_VERSION )) } # Start postgres and run the first half (old toolkit) of the test. # Must select_pg first. start_test() { $nop cargo pgrx start --package timescaledb_toolkit pg$PG_VERSION $nop cargo run --manifest-path tools/update-tester/Cargo.toml -- create-test-objects -u $LOGNAME -h 127.1 -p $PG_PORT } # Run the second half (new toolkit) of the test and stop postgres. # Must select_pg first. finish_test() { $nop cargo run --manifest-path tools/update-tester/Cargo.toml -- validate-test-objects -u $LOGNAME -h 127.1 -p $PG_PORT $nop cargo pgrx stop --package timescaledb_toolkit pg$PG_VERSION } deb_init() { [ -n "$OS_NAME" ] || die 'OS_NAME environment variable must be set to the distribution name e.g. debian or ubuntu' [ -n "$OS_VERSION" ] || die 'OS_VERSION environment variable must be set to the distribution version number' ARCH=`dpkg --print-architecture` EPOCH= MIN_DEB_ARM=`cmp_version $MIN_DEB_ARM` MIN_DEB_EPOCH=`cmp_version $MIN_DEB_EPOCH` } # Requires: # - FROM_VERSION skip_from_version() { # We released 1.10.0-dev by accident. We have to support upgrades # from it (and we tested that at the time), but we pulled the binaries, so # we can't test it here. [ $FROM_VERSION = 1.10.0-dev ] && return [ $OS_NAME = debian ] && [ $OS_VERSION = 10 ] && [ `cmp_version $FROM_VERSION` -lt 011100 ] && return [ $OS_NAME = ubuntu ] && [ $OS_VERSION = 22.04 ] && [ `cmp_version $FROM_VERSION` -lt 010600 ] && return } # Requires: # - FROM_VERSION # - PG_VERSION skip_from_version_pg_version() { # skip versions without binaries for this PostgreSQL version [ $PG_VERSION -gt 14 ] && [ `cmp_version $FROM_VERSION` -lt 011301 ] && return [ $PG_VERSION -gt 15 ] && [ `cmp_version $FROM_VERSION` -lt 011801 ] && return } # Requires: # - FROM_VERSION deb_start_test() { skip_from_version && return 1 cmp_version=`cmp_version $FROM_VERSION` [ "$ARCH" = arm64 ] && [ $cmp_version -lt $MIN_DEB_ARM ] && return 1 [ $cmp_version -ge $MIN_DEB_EPOCH ] && EPOCH=1: for PG_VERSION in $PG_VERSIONS; do skip_from_version_pg_version && continue select_pg $PG_VERSION deb=timescaledb-toolkit-postgresql-${PG_VERSION}=${EPOCH}${FROM_VERSION}~${OS_NAME}${OS_VERSION} $nop sudo apt-get -qq install $deb || die start_test || die done } test_deb() { deb_init for FROM_VERSION; do deb_start_test || continue for PG_VERSION in $PG_VERSIONS; do skip_from_version_pg_version && continue select_pg $PG_VERSION deb=timescaledb-toolkit-postgresql-${PG_VERSION}_${TOOLKIT_VERSION}~${OS_NAME}${OS_VERSION}_${ARCH}.deb $nop sudo dpkg -i "$BINDIR/$deb" finish_test $nop sudo dpkg -P timescaledb-toolkit-postgresql-$PG_VERSION done done } test_ci() { deb_init # When run under CI after a recent release, the Packages file in the # container image don't know about the latest version. $nop sudo apt-get update for FROM_VERSION; do deb_start_test || continue for PG_VERSION in $PG_VERSIONS; do skip_from_version_pg_version && continue select_pg $PG_VERSION $nop sudo dpkg -P timescaledb-toolkit-postgresql-$PG_VERSION # Installing (and possibly uninstalling) toolkit binary gives this back to root but we need to write to it. $nop sudo chown $LOGNAME /usr/lib/postgresql/$PG_VERSION/lib /usr/share/postgresql/$PG_VERSION/extension $nop tools/build -pg$PG_VERSION install finish_test done done } rpm_start_test() { for PG_VERSION in $PG_VERSIONS; do skip_from_version_pg_version && continue select_pg $PG_VERSION rpm=timescaledb-toolkit-postgresql-$PG_VERSION # yum doesn't seem to allow force-install of a specific version. # If the package is already installed at a different version, # the install command below does nothing. # So, uninstall if installed. $nop rpm -q $rpm > /dev/null && $nop sudo rpm -e $rpm $nop sudo yum -q -y install $rpm-$FROM_VERSION start_test done } test_rpm() { ARCH=`rpm -E '%{_arch}'` for FROM_VERSION; do skip_from_version && continue rpm_start_test for PG_VERSION in $PG_VERSIONS; do skip_from_version_pg_version && continue select_pg $PG_VERSION rpm=timescaledb-toolkit-postgresql-$PG_VERSION-$TOOLKIT_VERSION-0.el$OS_VERSION.$ARCH.rpm $nop sudo rpm -U "$BINDIR/$rpm" finish_test $nop sudo rpm -e timescaledb-toolkit-postgresql-$PG_VERSION done done } test_rpm_ci() { for FROM_VERSION; do skip_from_version && continue rpm_start_test for PG_VERSION in $PG_VERSIONS; do skip_from_version_pg_version && continue select_pg $PG_VERSION $nop sudo rpm -e timescaledb-toolkit-postgresql-$PG_VERSION $nop sudo chown -R $LOGNAME /usr/pgsql-$PG_VERSION/lib /usr/pgsql-$PG_VERSION/share/extension $nop tools/build -pg$PG_VERSION install finish_test done done } # Format 3-part version string for numeric comparison. # If this script has survived to see one of the 3 parts incremented past 99: # congratulations! It is not hard to fix. cmp_version() { minpat=${1#*.} printf '%02d%02d%02d' ${1%%.*} ${minpat%.*} ${minpat#*.} 2> /dev/null } print_upgradeable_from() { # TODO We never shipped a 1.4 deb and the 1.5 deb is called 1.5.0 # Let's draw the line there and remove those from upgradeable_from. # Someone who needs to upgrade from 1.4 or 1.5 can upgrade to 1.10.1 and then beyond. sed -n "s/'//g; s/,//g; s/^# upgradeable_from = 1\.4 1\.5 //p" $CONTROL } cleanup() { set +e for PG_VERSION in $PG_VERSIONS; do select_pg $PG_VERSION $nop cargo pgrx stop --package timescaledb_toolkit pg$PG_VERSION done } run() { [ -n "$LOGNAME" ] || die 'LOGNAME environment variable must be set to the login name' [ -n "$PG_VERSIONS" ] || die '-pgversions required' # TODO Requiring -bindir and -version when not all methods need them is awkward but eh. [ -d "$BINDIR" ] || die '-bindir required' [ -n "$TOOLKIT_VERSION" ] || die '-version required' trap cleanup 0 test_$1 `print_upgradeable_from` trap - 0 echo DONE } while [ $# -gt 0 ]; do arg="$1" shift case "$arg" in -n) nop=: ;; -bindir) BINDIR=$1 shift ;; -pgversions) PG_VERSIONS=$1 shift ;; -version) TOOLKIT_VERSION=$1 shift ;; ci|deb|rpm|rpm_ci) run $arg ;; *) usage ;; esac done ================================================ FILE: tools/update-tester/Cargo.toml ================================================ [package] name = "update-tester" version = "0.3.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] control_file_reader = {path = "../../crates/scripting-utilities/control_file_reader"} postgres_connection_configuration = {path = "../../crates/scripting-utilities/postgres_connection_configuration"} colored = "2.0.0" clap = { version = "3.2.15", features = ["wrap_help"] } postgres = "0.19.1" semver = "1.0.9" toml_edit = "0.14.3" xshell = "0.1.17" pulldown-cmark = "0.8.0" walkdir = "2.3.2" bytecount = "0.6.3" ================================================ FILE: tools/update-tester/Readme.md ================================================ # Update Tester # Runs update tests. It'll install every version of the extension marked as `upgradeable_from` in `timescaledb_toolkit.control` and test that updates to the current version work correctly. At a high level: 1. For each version in `upgradeable_from` 1. Checkout the corresponding tag in git, 2. Build and install the extension at that tag, 3. Set git back to the original state. 2. Build and install the extension at the original git state. 3. For each version in `upgradeable_from` 1. create a database, 2. install the old version of the extension, 3. install some `timescaledb_toolkit` objects, 4. update the extension, 5. validate the extension is in the expected state. **NOTE:** Running this _will_ move git's `HEAD`. Though git will warn on conflicts, and we do our best to reset the tree state before the script exits, we recommend only using it on a clean tree. ``` USAGE: update-tester [OPTIONS] FLAGS: --help Prints help information -V, --version Prints version information OPTIONS: -d, --database postgres database the root connection should use. By default this DB will only be used to spawn the individual test databases; no tests will run against it. -h, --host postgres host -a, --password postgres password -p, --port postgres port -u, --user postgres user ARGS: Path in which to find the timescaledb-toolkit repo Path to pg_config for the DB we are using Path to cargo-pgrx (must be 0.4 series or newer) Path to cargo-pgrx 0.2-0.3 series ``` ================================================ FILE: tools/update-tester/src/installer.rs ================================================ #![allow(unexpected_cfgs)] use std::{collections::HashSet, path::Path}; use colored::Colorize; use semver::Version; use toml_edit::Document; use xshell::{cmd, cp, mkdir_p, pushd, read_dir}; use crate::{defer, quietly_run}; fn pgrx_name(version: &Version) -> &'static str { if version >= &Version::new(0, 7, 4) { "pgx" } else { "pgrx" } } #[allow(clippy::too_many_arguments)] pub fn install_all_versions( root_dir: &str, cache_dir: Option<&str>, pg_config: &str, cargo_pgrx: &str, cargo_pgrx_old: &str, current_version: &str, old_versions: &[String], reinstall: &HashSet<&str>, ) -> xshell::Result<()> { let extension_dir = path!(root_dir / "extension"); let install_toolkit = |pgrx_version: Version| -> xshell::Result<()> { let _d = pushd(&extension_dir)?; let pgrx_name = pgrx_name(&pgrx_version); match pgrx_version >= Version::new(0, 4, 0) { true => quietly_run(cmd!("{cargo_pgrx} {pgrx_name} install -c {pg_config}")), false => quietly_run(cmd!("{cargo_pgrx_old} {pgrx_name} install -c {pg_config}")), } }; let post_install = || -> xshell::Result<()> { let _d = pushd(root_dir)?; quietly_run(cmd!( "cargo run --manifest-path ./tools/post-install/Cargo.toml -- {pg_config}" )) }; if let Some(cache_dir) = cache_dir { restore_from_cache(cache_dir, pg_config)? } { let base_checkout = get_current_checkout()?; let pgrx_version = get_pgrx_version( &std::fs::read_to_string("extension/Cargo.toml").expect("unable to read Cargo.toml"), ); // Install the versions in reverse-time order. // Since later versions tend to be supersets of old versions, // I expect compilation to be faster this way - Josh for version in old_versions.iter().rev() { let force_reinstall = reinstall.contains(&**version); if !force_reinstall && version_is_installed(pg_config, version)? { eprintln!("{} {}", "Already Installed".blue(), version); continue; } eprintln!("{} {}", "Installing".bold().cyan(), version); let tag_version = tag_version(version); quietly_run(cmd!("git fetch origin tag {tag_version}"))?; quietly_run(cmd!("git checkout tags/{tag_version}"))?; let _d = defer(|| quietly_run(cmd!("git checkout {base_checkout}"))); let pgrx_version = get_pgrx_version( &std::fs::read_to_string("extension/Cargo.toml") .expect("unable to read Cargo.toml"), ); install_toolkit(pgrx_version)?; post_install()?; eprintln!("{} {}", "Finished".bold().green(), version); } if let Some(cache_dir) = cache_dir { save_to_cache(cache_dir, pg_config)?; } eprintln!( "{} {} ({})", "Installing Current".bold().cyan(), current_version, base_checkout ); install_toolkit(pgrx_version)?; } post_install()?; eprintln!("{}", "Finished Current".bold().green()); Ok(()) } fn get_current_checkout() -> xshell::Result { let current_branch = cmd!("git rev-parse --abbrev-ref --symbolic-full-name HEAD").read()?; if current_branch != "HEAD" { return Ok(current_branch); } cmd!("git rev-parse --verify HEAD").read() } fn get_pgrx_version(cargo_toml_contents: &str) -> Version { let cargo = cargo_toml_contents .parse::() .expect("invalid Cargo.toml"); let pgrx_dependency = cargo["dependencies"] .get("pgrx") // check old name if no pgrx found .unwrap_or_else(|| &cargo["dependencies"]["pgx"]); pgrx_dependency .as_str() .expect("expected pgrx to only have a version") .trim_start_matches(['=', '^', '~'].as_slice()) .parse() .expect("cannot parse pgrx version") } // We were unprincipled with some of our old versions, so the version from // the control file is `x.y`, while the tag is `x.y.0`. This function translates // from the control file version to the tag version (in a rather hacky way) fn tag_version(version: &str) -> String { if version.matches('.').count() >= 2 { return version.into(); } format!("{version}.0") } //-----------------------// //-- Cache Maintenance --// //-----------------------// fn version_is_installed(pg_config: &str, version: &str) -> xshell::Result { let binary_name = format!("timescaledb_toolkit-{version}.so"); let bin_dir = cmd!("{pg_config} --pkglibdir").read()?; let installed_files = read_dir(bin_dir)?; let installed = installed_files.into_iter().any(|file| { file.file_name() .map(|name| name.to_string_lossy() == binary_name) .unwrap_or(false) }); Ok(installed) } fn restore_from_cache(cache_dir: &str, pg_config: &str) -> xshell::Result<()> { if !path!(cache_dir).exists() { eprintln!("{}", "Cache does not exist".yellow()); return Ok(()); } eprintln!("{} {}", "Restoring from Cache".bold().blue(), cache_dir); let bin_dir = cmd!("{pg_config} --pkglibdir").read()?; let share_dir = cmd!("{pg_config} --sharedir").read()?; let script_dir = path!(share_dir / "extension"); let cached_bin_dir = path!(cache_dir / "bin"); let cached_script_dir = path!(cache_dir / "extension"); cp_dir(cached_bin_dir, bin_dir, |_| true)?; cp_dir(cached_script_dir, script_dir, |_| true) } fn save_to_cache(cache_dir: &str, pg_config: &str) -> xshell::Result<()> { eprintln!("{} {}", "Saving to Cache".blue(), cache_dir); let cached_bin_dir = path!(cache_dir / "bin"); let cached_script_dir = path!(cache_dir / "extension"); if !cached_bin_dir.exists() { mkdir_p(&cached_bin_dir)? } if !cached_script_dir.exists() { mkdir_p(&cached_script_dir)? } let bin_dir = cmd!("{pg_config} --pkglibdir").read()?; let share_dir = cmd!("{pg_config} --sharedir").read()?; let script_dir = path!(share_dir / "extension"); let is_toolkit_file = |file: &Path| { file.file_name() .map(|f| f.to_string_lossy().starts_with("timescaledb_toolkit")) .unwrap_or(false) }; cp_dir(bin_dir, cached_bin_dir, is_toolkit_file)?; cp_dir(script_dir, cached_script_dir, is_toolkit_file) } fn cp_dir( src: impl AsRef, dst: impl AsRef, mut filter: impl FnMut(&Path) -> bool, ) -> xshell::Result<()> { let dst = dst.as_ref(); for file in read_dir(src)? { if filter(&file) { cp(file, dst)?; } } Ok(()) } ================================================ FILE: tools/update-tester/src/main.rs ================================================ use std::{ collections::HashSet, io::{self, Write}, path::Path, process, }; use clap::Arg; use clap::Command; use colored::Colorize; use xshell::{read_file, Cmd}; use control_file_reader::get_upgradeable_from; use postgres_connection_configuration::ConnectionConfig; // macro for literate path joins macro_rules! path { ($start:ident $(/ $segment: literal)*) => { { let root: &Path = $start.as_ref(); root $(.join($segment))* } }; ($start:ident / $segment: expr) => { { let root: &Path = $start.as_ref(); root.join($segment) } } } mod installer; mod parser; mod testrunner; fn main() { let matches = Command::new("update-tester") .about("Update tester for toolkit releases") .subcommand_required(true) .arg_required_else_help(true) .subcommand( Command::new("full-update-test-source") .long_flag("full-update-test-source") .about("Run update-test, building toolkit from source unless a local cache is supplied") .arg( Arg::new("HOST") .short('h') .long("host") .takes_value(true) ) .arg( Arg::new("PORT") .short('p') .long("port") .takes_value(true) ) .arg( Arg::new("USER") .short('u') .long("user") .takes_value(true) ) .arg( Arg::new("PASSWORD") .short('a') .long("password") .takes_value(true) ) .arg( Arg::new("DB") .short('d') .long("database") .takes_value(true) ) .arg(Arg::new("CACHE").short('c').long("cache").takes_value(true)) .arg(Arg::new("REINSTALL").long("reinstall").takes_value(true)) .arg(Arg::new("PG_CONFIG").takes_value(true)) .arg(Arg::new("CARGO_PGRX").takes_value(true)) .arg(Arg::new("CARGO_PGRX_OLD").takes_value(true)), ) .subcommand( Command::new("create-test-objects") .long_flag("create-test-objects") .about("Creates test objects in a db using the currently installed version of Toolkit") .arg( Arg::new("HOST") .short('h') .long("host") .takes_value(true) ) .arg( Arg::new("PORT") .short('p') .long("port") .takes_value(true) ) .arg( Arg::new("USER") .short('u') .long("user") .takes_value(true) ) .arg( Arg::new("PASSWORD") .short('a') .long("password") .takes_value(true) ) .arg( Arg::new("DB") .short('d') .long("database") .takes_value(true) ) ) .subcommand( Command::new("validate-test-objects") .long_flag("validate-test-objects") .about("Runs a series of checks on the objects created by create-test-objects using the currently installed version of Toolkit") .arg( Arg::new("HOST") .short('h') .long("host") .takes_value(true) ) .arg( Arg::new("PORT") .short('p') .long("port") .takes_value(true) ) .arg( Arg::new("USER") .short('u') .long("user") .takes_value(true) ) .arg( Arg::new("PASSWORD") .short('a') .long("password") .takes_value(true) ) .arg( Arg::new("DB") .short('d') .long("database") .takes_value(true) ) ) // Mutates help, removing the short flag (-h) so that it can be used by HOST .mut_arg("help", |_h| { Arg::new("help") .long("help") }) .get_matches(); match matches.subcommand() { Some(("full-update-test-source", full_update_matches)) => { let connection_config = ConnectionConfig { host: full_update_matches.value_of("HOST"), port: full_update_matches.value_of("PORT"), user: full_update_matches.value_of("USER"), password: full_update_matches.value_of("PASSWORD"), database: full_update_matches.value_of("DB"), }; let cache_dir = full_update_matches.value_of("CACHE"); let root_dir = "."; let reinstall = full_update_matches .value_of("REINSTALL") .map(|r| r.split_terminator(',').collect()) .unwrap_or_else(HashSet::new); let pg_config = full_update_matches .value_of("PG_CONFIG") .expect("missing pg_config"); let cargo_pgrx = full_update_matches .value_of("CARGO_PGRX") .expect("missing cargo_pgrx"); let cargo_pgrx_old = full_update_matches .value_of("CARGO_PGRX_OLD") .expect("missing cargo_pgrx_old"); let mut num_errors = 0; let on_error = |test: parser::Test, error: testrunner::TestError| { num_errors += 1; eprintln!( "{} {}\n", test.location.bold().blue(), test.header.bold().dimmed() ); eprintln!("{}", error.annotate_position(&test.text)); eprintln!("{error}\n"); }; let res = try_main( root_dir, cache_dir, &connection_config, pg_config, cargo_pgrx, cargo_pgrx_old, reinstall, on_error, ); if let Err(err) = res { eprintln!("{err}"); process::exit(1); } if num_errors > 0 { eprintln!( "{} {}\n", num_errors.to_string().bold().red(), "Tests Failed".bold().red() ); process::exit(1) } eprintln!("{}\n", "Tests Passed".bold().green()); } Some(("create-test-objects", create_test_object_matches)) => { let connection_config = ConnectionConfig { host: create_test_object_matches.value_of("HOST"), port: create_test_object_matches.value_of("PORT"), user: create_test_object_matches.value_of("USER"), password: create_test_object_matches.value_of("PASSWORD"), database: create_test_object_matches.value_of("DB"), }; let mut num_errors = 0; let on_error = |test: parser::Test, error: testrunner::TestError| { num_errors += 1; eprintln!( "{} {}\n", test.location.bold().blue(), test.header.bold().dimmed() ); eprintln!("{}", error.annotate_position(&test.text)); eprintln!("{error}\n"); }; let res = try_create_objects(&connection_config, on_error); if let Err(err) = res { eprintln!("{err}"); process::exit(1); } if num_errors > 0 { eprintln!( "{} {} {}\n", "Object Creation Failed With".bold().red(), num_errors.to_string().bold().red(), "Errors".bold().red() ); process::exit(1) } eprintln!("{}\n", "Objects Created Successfully".bold().green()); } Some(("validate-test-objects", validate_test_object_matches)) => { let connection_config = ConnectionConfig { host: validate_test_object_matches.value_of("HOST"), port: validate_test_object_matches.value_of("PORT"), user: validate_test_object_matches.value_of("USER"), password: validate_test_object_matches.value_of("PASSWORD"), database: validate_test_object_matches.value_of("DB"), }; let mut num_errors = 0; let on_error = |test: parser::Test, error: testrunner::TestError| { num_errors += 1; eprintln!( "{} {}\n", test.location.bold().blue(), test.header.bold().dimmed() ); eprintln!("{}", error.annotate_position(&test.text)); eprintln!("{error}\n"); }; let root_dir = "."; let res = try_validate_objects(&connection_config, root_dir, on_error); if let Err(err) = res { eprintln!("{err}"); process::exit(1); } if num_errors > 0 { eprintln!("{num_errors} {}\n", "Tests Failed".bold().red()); eprintln!("{}\n", "Validation Failed".bold().red()); process::exit(1) } eprintln!("{}\n", "Validations Completed Successfully".bold().green()); } _ => unreachable!(), // if all subcommands are defined, anything else is unreachable } } #[allow(clippy::too_many_arguments)] fn try_main( root_dir: &str, cache_dir: Option<&str>, db_conn: &ConnectionConfig<'_>, pg_config: &str, cargo_pgrx: &str, cargo_pgrx_old: &str, reinstall: HashSet<&str>, on_error: OnErr, ) -> xshell::Result<()> { let (current_version, old_versions) = get_version_info(root_dir)?; if old_versions.is_empty() { panic!("no old versions to upgrade from") } println!("{} [{}]", "Testing".green().bold(), old_versions.join(", ")); installer::install_all_versions( root_dir, cache_dir, pg_config, cargo_pgrx, cargo_pgrx_old, ¤t_version, &old_versions, &reinstall, )?; testrunner::run_update_tests(db_conn, current_version, old_versions, on_error) } fn try_create_objects( db_conn: &ConnectionConfig<'_>, on_error: OnErr, ) -> xshell::Result<()> { testrunner::create_test_objects_for_package_testing(db_conn, on_error) } fn try_validate_objects( _conn: &ConnectionConfig<'_>, root_dir: &str, on_error: OnErr, ) -> xshell::Result<()> { let (_current_version, old_versions) = get_version_info(root_dir)?; if old_versions.is_empty() { panic!("no old versions to upgrade from") } testrunner::update_to_and_validate_new_toolkit_version(_conn, on_error) } fn get_version_info(root_dir: &str) -> xshell::Result<(String, Vec)> { let extension_dir = path!(root_dir / "extension"); let control_file = path!(extension_dir / "timescaledb_toolkit.control"); let manifest_file = path!(extension_dir / "Cargo.toml"); let manifest_contents = read_file(manifest_file)?; let control_contents = read_file(control_file)?; let current_version = manifest_contents .parse::() .expect("failed to parse extension/Cargo.toml") .get("package") .expect("failed to find [package] in extension/Cargo.toml") .get("version") .expect("failed to find package.version in extension/Cargo.toml") .as_str() .expect("package.version not a string in extension/Cargo.toml") .to_owned(); let upgradable_from = get_upgradeable_from(&control_contents) .unwrap_or_else(|e| panic!("{e} in control file {control_contents}")); Ok((current_version, upgradable_from)) } //-------------// //- Utilities -// //-------------// // run a command, only printing the output on failure fn quietly_run(cmd: Cmd) -> xshell::Result<()> { let display = format!("{cmd}"); let output = cmd.ignore_status().output()?; if !output.status.success() { io::stdout() .write_all(&output.stdout) .expect("cannot write to stdout"); io::stdout() .write_all(&output.stderr) .expect("cannot write to stdout"); panic!( "{} `{display}` exited with a non-zero error code {}", "ERROR".bold().red(), output.status ) } Ok(()) } // run a command on `drop()` fn defer(f: impl FnMut() -> T) -> Deferred T> { Deferred(f) } struct Deferred T>(F); impl Drop for Deferred where F: FnMut() -> T, { fn drop(&mut self) { self.0(); } } ================================================ FILE: tools/update-tester/src/parser.rs ================================================ use std::{collections::HashMap, ffi::OsStr, fs, path::Path}; use pulldown_cmark::{ CodeBlockKind::Fenced, CowStr, Event, Parser, Tag::{CodeBlock, Heading}, }; use semver::Version; #[derive(Debug, PartialEq, Eq)] #[must_use] pub struct TestFile { pub name: String, stateless: bool, pub tests: Vec, } #[derive(Debug, PartialEq, Eq, Clone)] #[must_use] pub struct Test { pub location: String, pub header: String, pub text: String, pub output: Vec>, transactional: bool, ignore_output: bool, pub precision_limits: HashMap, pub creation: bool, pub validation: bool, pub min_toolkit_version: Option, } pub fn extract_tests(root: &str) -> Vec { // TODO handle when root is a file let mut all_tests = vec![]; let walker = walkdir::WalkDir::new(root) .follow_links(true) .sort_by(|a, b| a.path().cmp(b.path())); for entry in walker { let entry = entry.unwrap(); if !entry.file_type().is_file() { continue; } if entry.path().extension() != Some(OsStr::new("md")) { continue; } let contents = fs::read_to_string(entry.path()).unwrap(); let tests = extract_tests_from_string(&contents, &entry.path().to_string_lossy()); if !tests.tests.is_empty() { all_tests.push(tests) } } all_tests } // parsers the grammar `(heading* (test output?)*)*` pub fn extract_tests_from_string(s: &str, file_stem: &str) -> TestFile { let mut parser = Parser::new(s).into_offset_iter().peekable(); let mut heading_stack: Vec = vec![]; let mut tests = vec![]; let mut last_test_seen_at = 0; let mut lines_seen = 0; let mut stateless = true; // consume the parser until an tag is reached, performing an action on each text macro_rules! consume_text_until { ($parser: ident yields $end: pat => $action: expr) => { for (event, _) in &mut parser { match event { Event::Text(text) => $action(text), $end => break, _ => (), } } }; } 'block_hunt: while let Some((event, span)) = parser.next() { match event { // we found a heading, add it to the stack Event::Start(Heading(level)) => { heading_stack.truncate(level as usize - 1); let mut header = "`".to_string(); consume_text_until!(parser yields Event::End(Heading(..)) => |text: CowStr| header.push_str(&text) ); header.truncate(header.trim_end().len()); header.push('`'); heading_stack.push(header); } // we found a code block, if it's a test add the test Event::Start(CodeBlock(Fenced(ref info))) => { let code_block_info = parse_code_block_info(info); // non-test code block, consume it and continue looking if let BlockKind::Other = code_block_info.kind { for (event, _) in &mut parser { if let Event::End(CodeBlock(Fenced(..))) = event { break; } } continue 'block_hunt; } let current_line = { let offset = span.start; lines_seen += bytecount::count(&s.as_bytes()[last_test_seen_at..offset], b'\n'); last_test_seen_at = offset; lines_seen + 1 }; if let BlockKind::Output = code_block_info.kind { panic!( "found output with no test test.\n{file_stem}:{current_line} {heading_stack:?}" ) } assert!(matches!(code_block_info.kind, BlockKind::Sql)); stateless &= code_block_info.transactional; let mut test = Test { location: format!("{file_stem}:{current_line}"), header: if heading_stack.is_empty() { "".to_string() } else { heading_stack.join("::") }, text: String::new(), output: Vec::new(), transactional: code_block_info.transactional, ignore_output: code_block_info.ignore_output, precision_limits: code_block_info.precision_limits, min_toolkit_version: code_block_info.min_toolkit_version, creation: code_block_info.creation, validation: code_block_info.validation, }; // consume the lines of the test consume_text_until!(parser yields Event::End(CodeBlock(Fenced(..))) => |text: CowStr| test.text.push_str(&text) ); // search to see if we have output loop { match parser.peek() { // we found a code block, is it output? Some((Event::Start(CodeBlock(Fenced(info))), _)) => { let code_block_info = parse_code_block_info(info); match code_block_info.kind { // non-output, continue at the top BlockKind::Sql | BlockKind::Other => { tests.push(test); continue 'block_hunt; } // output, consume it BlockKind::Output => { if !test.precision_limits.is_empty() && !code_block_info.precision_limits.is_empty() { panic!( "cannot have precision limits on both test and output.\n{file_stem}:{current_line} {heading_stack:?}", ) } test.precision_limits = code_block_info.precision_limits; let _ = parser.next(); break; } } } // test must be over, continue at the top Some((Event::Start(CodeBlock(..)), _)) | Some((Event::Start(Heading(..)), _)) => { tests.push(test); continue 'block_hunt; } // EOF, we're done None => { tests.push(test); break 'block_hunt; } // for now we allow text between the test and it's output // TODO should/can we forbid this? _ => { let _ = parser.next(); } }; } // consume the output consume_text_until!(parser yields Event::End(CodeBlock(Fenced(..))) => |text: CowStr| { let rows = text.split('\n').skip(2).filter(|s| !s.is_empty()).map(|s| s.split('|').map(|s| s.trim().to_string()).collect::>() ); test.output.extend(rows); } ); tests.push(test); } _ => (), } } // create filename from full path let file_name = Path::new(&file_stem).file_stem().unwrap().to_str().unwrap(); TestFile { name: file_name.to_string(), stateless, tests, } } struct CodeBlockInfo { kind: BlockKind, transactional: bool, ignore_output: bool, precision_limits: HashMap, min_toolkit_version: Option, creation: bool, validation: bool, } #[derive(Clone, Copy)] enum BlockKind { Sql, Output, Other, } fn parse_code_block_info(info: &str) -> CodeBlockInfo { let tokens = info.split(','); let mut info = CodeBlockInfo { kind: BlockKind::Other, transactional: true, ignore_output: false, precision_limits: HashMap::new(), min_toolkit_version: None, creation: false, validation: false, }; for token in tokens { match token.trim() { "ignore" => { if let BlockKind::Sql = info.kind { info.kind = BlockKind::Other; } } "non-transactional" => info.transactional = false, "ignore-output" => info.ignore_output = true, m if m.starts_with("min-toolkit-version") => { // TODO Can we assume that version is greater than 1.10.1 since current tests don't have a min version? This means we ccan skip edge cases of 1.4/1.10.0-dev/etc. info.min_toolkit_version = Some(Version::parse(token.trim_start_matches("min-toolkit-version=")).unwrap()) } // not great, shouldn't assume they typed in a valid version. fix later "creation" => info.creation = true, "validation" => info.validation = true, "output" => info.kind = BlockKind::Output, s if s.eq_ignore_ascii_case("sql") => info.kind = BlockKind::Sql, p if p.starts_with("precision") => { // syntax `precision(col: bytes)` let precision_err = || -> ! { panic!("invalid syntax for `precision(col: bytes)` found `{p}`") }; let arg = &p["precision".len()..]; if arg.as_bytes().first() != Some(&b'(') || arg.as_bytes().last() != Some(&b')') { precision_err() } let arg = &arg[1..arg.len() - 1]; let args: Vec<_> = arg.split(':').collect(); if args.len() != 2 { precision_err() } let column = args[0].trim().parse().unwrap_or_else(|_| precision_err()); let length = args[1].trim().parse().unwrap_or_else(|_| precision_err()); let old = info.precision_limits.insert(column, length); if old.is_some() { panic!("duplicate precision for column {column}") } } _ => {} } } info } #[cfg(test)] mod tests { use std::collections::HashMap; use semver::{BuildMetadata, Prerelease, Version}; #[test] fn extract() { use super::{Test, TestFile}; let file = r##" # Test Parsing ```SQL,creation select * from foo ``` ```output ``` ```SQL,creation select * from multiline ``` ```output ?column? ---------- value ``` ## ignored ```SQL,ignore,creation select * from foo ``` ## non-transactional,creation ```SQL,non-transactional,creation select * from bar ``` ```output, precision(1: 3) a | b ---+--- 1 | 2 ``` ## no output ```SQL,ignore-output,creation select * from baz ``` ## end by header ```SQL,creation select * from quz ``` ## end by file ```SQL,creation select * from qat ``` ## has a min-toolkit-version ```SQL,creation,min-toolkit-version=1.10.1 select * from qat ``` "##; let tests = super::extract_tests_from_string(file, "/test/file.md"); let expected = TestFile { name: "file".to_string(), stateless: false, tests: vec![ Test { location: "/test/file.md:3".to_string(), header: "`Test Parsing`".to_string(), text: "select * from foo\n".to_string(), output: vec![], transactional: true, ignore_output: false, precision_limits: HashMap::new(), creation: true, min_toolkit_version: None, validation: false, }, Test { location: "/test/file.md:9".to_string(), header: "`Test Parsing`".to_string(), text: "select * from multiline\n".to_string(), output: vec![vec!["value".to_string()]], transactional: true, ignore_output: false, precision_limits: HashMap::new(), creation: true, min_toolkit_version: None, validation: false, }, Test { location: "/test/file.md:24".to_string(), header: "`Test Parsing`::`non-transactional,creation`".to_string(), text: "select * from bar\n".to_string(), output: vec![vec!["1".to_string(), "2".to_string()]], transactional: false, ignore_output: false, precision_limits: [(1, 3)].iter().cloned().collect(), creation: true, min_toolkit_version: None, validation: false, }, Test { location: "/test/file.md:34".to_string(), header: "`Test Parsing`::`no output`".to_string(), text: "select * from baz\n".to_string(), output: vec![], transactional: true, ignore_output: true, precision_limits: HashMap::new(), creation: true, min_toolkit_version: None, validation: false, }, Test { location: "/test/file.md:39".to_string(), header: "`Test Parsing`::`end by header`".to_string(), text: "select * from quz\n".to_string(), output: vec![], transactional: true, ignore_output: false, precision_limits: HashMap::new(), creation: true, min_toolkit_version: None, validation: false, }, Test { location: "/test/file.md:44".to_string(), header: "`Test Parsing`::`end by file`".to_string(), text: "select * from qat\n".to_string(), output: vec![], transactional: true, ignore_output: false, precision_limits: HashMap::new(), creation: true, min_toolkit_version: None, validation: false, }, Test { location: "/test/file.md:49".to_string(), header: "`Test Parsing`::`has a min-toolkit-version`".to_string(), text: "select * from qat\n".to_string(), output: vec![], transactional: true, ignore_output: false, precision_limits: HashMap::new(), creation: true, min_toolkit_version: Some(Version { major: 1, minor: 10, patch: 1, pre: Prerelease::EMPTY, build: BuildMetadata::EMPTY, }), validation: false, }, ], }; assert!( tests == expected, "left: {:#?}\n right: {:#?}", tests, expected ); } } ================================================ FILE: tools/update-tester/src/testrunner/stabilization.rs ================================================ pub use stabilization_info::*; #[path = "../../../../extension/src/stabilization_info.rs"] mod stabilization_info; #[macro_export] macro_rules! functions_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($fn_name: ident ( $( $($fn_type: ident)+ ),* ) ),* $(,)? } )* ) => { pub static $export_symbol: &[&str] = &[ $( $(stringify!( $fn_name( $( $($fn_type)+ ),* ) ),)* )* ]; }; } #[macro_export] macro_rules! types_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($type_name: ident),* $(,)? } )* ) => { pub static $export_symbol: &[&str] = &[ $( $(stringify!($type_name),)* )* ]; }; } #[macro_export] macro_rules! operators_stabilized_at { ( $export_symbol: ident $( $version: literal => { $($operator_name: literal ( $( $($fn_type: ident)+ ),* ) ),* $(,)? } )* ) => { #[allow(non_snake_case)] pub fn $export_symbol() -> std::collections::HashSet { static OPERATORS: &[(&str, &[&str])] = &[ $( $( ( $operator_name, &[ $( stringify!($($fn_type)+) ),* ] ), )* )* ]; OPERATORS.iter().map(|(name, types)| { format!("{}({})", name, types.join(",")) }).collect() } }; } ================================================ FILE: tools/update-tester/src/testrunner.rs ================================================ use colored::Colorize; use semver::{BuildMetadata, Prerelease, Version}; use crate::{defer, parser, Deferred}; use postgres::{Client, NoTls, SimpleQueryMessage}; use postgres_connection_configuration::ConnectionConfig; mod stabilization; use crate::parser::Test; use postgres::error::DbError; use std::{borrow::Cow, error::Error, fmt}; pub fn run_update_tests( root_config: &ConnectionConfig, current_toolkit_version: String, old_toolkit_versions: Vec, mut on_error: OnErr, ) -> Result<(), xshell::Error> { for old_toolkit_version in old_toolkit_versions { eprintln!( " {} {old_toolkit_version} -> {current_toolkit_version}", "Testing".bold().cyan() ); let test_db_name = format!("tsdb_toolkit_test_{old_toolkit_version}--{current_toolkit_version}"); let test_config = root_config.with_db(&test_db_name); with_temporary_db(&test_db_name, root_config, || { let mut test_client = connect_to(&test_config); let errors = test_client .create_test_objects_from_files(test_config, old_toolkit_version.clone()); for (test, error) in errors { match error { Ok(..) => continue, Err(error) => on_error(test, error), } } let errors = test_client .validate_test_objects_from_files(test_config, old_toolkit_version.clone()); for (test, error) in errors { match error { Ok(..) => continue, Err(error) => on_error(test, error), } } }); eprintln!( "{} {old_toolkit_version} -> {current_toolkit_version}", "Finished".bold().green() ); } Ok(()) } pub fn create_test_objects_for_package_testing( root_config: &ConnectionConfig, mut on_error: OnErr, ) -> Result<(), xshell::Error> { eprintln!(" {}", "Creating test objects".bold().cyan()); let test_db_name = "tsdb_toolkit_test"; let test_config = root_config.with_db(test_db_name); let mut client = connect_to(root_config).0; let drop = format!(r#"DROP DATABASE IF EXISTS "{test_db_name}""#); client .simple_query(&drop) .unwrap_or_else(|e| panic!("could not drop db {test_db_name} due to {e}")); let create = format!(r#"create database "{test_db_name}""#); client .simple_query(&create) .unwrap_or_else(|e| panic!("could not create db {test_db_name} due to {e}")); let mut test_client = connect_to(&test_config); let create = "CREATE EXTENSION timescaledb_toolkit"; test_client .simple_query(create) .unwrap_or_else(|e| panic!("could not install extension due to {e}",)); let current_toolkit_version = test_client.get_installed_extension_version(); // create test objects let errors = test_client.create_test_objects_from_files(test_config, current_toolkit_version); for (test, error) in errors { match error { Ok(..) => continue, Err(error) => on_error(test, error), } } eprintln!("{}", "Finished Object Creation".bold().green()); Ok(()) } fn connect_to(config: &ConnectionConfig<'_>) -> TestClient { let client = Client::connect(&config.config_string(), NoTls).unwrap_or_else(|e| { panic!( "could not connect to postgres DB {database} due to {e}", database = config.database.unwrap_or(""), e = e ) }); TestClient(client) } pub fn update_to_and_validate_new_toolkit_version( root_config: &ConnectionConfig, mut on_error: OnErr, ) -> Result<(), xshell::Error> { // update extension to new version let test_db_name = "tsdb_toolkit_test"; let test_config = root_config.with_db(test_db_name); let mut test_client = connect_to(&test_config); // get the currently installed version before updating let old_toolkit_version = test_client.get_installed_extension_version(); test_client.update_to_current_toolkit_version(); // run validation tests let errors = test_client.validate_test_objects_from_files(test_config, old_toolkit_version); for (test, error) in errors { match error { Ok(..) => continue, Err(error) => on_error(test, error), } } // This close needs to happen before trying to drop the DB or else panics with `There is 1 other session using the database.` test_client .0 .close() .unwrap_or_else(|e| panic!("Could not close connection to postgres DB due to {e}")); // if the validation passes, drop the db let mut client = connect_to(root_config).0; eprintln!("{}", "Dropping database.".bold().green()); let drop = format!(r#"DROP DATABASE IF EXISTS "{test_db_name}""#); client .simple_query(&drop) .unwrap_or_else(|e| panic!("could not drop db {test_db_name} due to {e}")); Ok(()) } //---------------// //- DB creation -// //---------------// fn with_temporary_db( db_name: impl AsRef, root_config: &ConnectionConfig<'_>, f: impl FnOnce() -> T, ) -> T { let _db_dropper = create_db(root_config, db_name.as_ref()); let res = f(); drop(_db_dropper); res } // create a database returning an guard that will DROP the db on `drop()` #[must_use] fn create_db<'a>( root_config: &'a ConnectionConfig<'_>, new_db_name: &'a str, ) -> Deferred<(), impl FnMut() + 'a> { let mut client = connect_to(root_config).0; let create = format!(r#"CREATE DATABASE "{new_db_name}""#); client .simple_query(&create) .unwrap_or_else(|e| panic!("could not create db {new_db_name} due to {e}")); defer(move || { let mut client = connect_to(root_config).0; let drop = format!(r#"DROP DATABASE "{new_db_name}""#); client .simple_query(&drop) .unwrap_or_else(|e| panic!("could not drop db {new_db_name} due to {e}")); }) } //-------------------// //- Test Components -// //-------------------// struct TestClient(Client); type QueryValues = Vec>>; impl TestClient { fn install_toolkit_at_version(&mut self, old_toolkit_version: &str) { let create = format!(r#"CREATE EXTENSION timescaledb_toolkit VERSION "{old_toolkit_version}""#); self.simple_query(&create).unwrap_or_else(|e| { panic!("could not install extension at version {old_toolkit_version} due to {e}",) }); } fn create_test_objects_from_files( &mut self, root_config: ConnectionConfig<'_>, current_toolkit_version: String, ) -> Vec<(Test, Result<(), TestError>)> { let all_tests = parser::extract_tests("tests/update"); // Hack to match previous versions of toolkit that don't conform to Semver. let current_toolkit_semver = match current_toolkit_version.as_str() { "1.4" => Version { major: 1, minor: 4, patch: 0, pre: Prerelease::EMPTY, build: BuildMetadata::EMPTY, }, "1.5" => Version { major: 1, minor: 5, patch: 0, pre: Prerelease::EMPTY, build: BuildMetadata::EMPTY, }, "1.10.0-dev" => Version { major: 1, minor: 10, patch: 0, pre: Prerelease::EMPTY, build: BuildMetadata::EMPTY, }, x => Version::parse(x).unwrap(), }; let errors: Vec<_> = all_tests .into_iter() .flat_map(|tests| { let mut db_creation_client = connect_to(&root_config); let test_db_name = format!("{}_{}", tests.name, current_toolkit_version); let drop = format!(r#"DROP DATABASE IF EXISTS "{test_db_name}""#); db_creation_client .simple_query(&drop) .unwrap_or_else(|e| panic!("could not drop db {test_db_name} due to {e}")); let locale_flags = { match std::process::Command::new("locale").arg("-a").output() { Ok(cmd) if String::from_utf8_lossy(&cmd.stdout) .lines() .any(|l| l == "C.UTF-8") => { "LC_COLLATE 'C.UTF-8' LC_CTYPE 'C.UTF-8'" } _ => "LC_COLLATE 'C' LC_CTYPE 'C'", } }; let create = format!(r#"CREATE DATABASE "{test_db_name}" {locale_flags}"#,); db_creation_client .simple_query(&create) .unwrap_or_else(|e| panic!("could not create db {test_db_name} due to {e}")); let test_config = root_config.with_db(&test_db_name); let mut test_client = connect_to(&test_config); test_client .simple_query(&format!( r#"ALTER DATABASE "{test_db_name}" SET timezone TO 'UTC';"#, )) .unwrap_or_else(|e| panic!("could not set time zone to UTC due to {e}")); // install new version and make sure it is correct test_client.install_toolkit_at_version(¤t_toolkit_version); let installed_version = test_client.get_installed_extension_version(); assert_eq!( installed_version, current_toolkit_version, "installed unexpected version" ); tests .tests .into_iter() .filter(|x| x.creation) .filter(|x| match &x.min_toolkit_version { Some(version) => version <= ¤t_toolkit_semver, None => true, }) .map(move |test| { let output = run_test(&mut test_client, &test); (test, output) }) }) .collect(); errors } fn update_to_current_toolkit_version(&mut self) { let update = "ALTER EXTENSION timescaledb_toolkit UPDATE"; self.simple_query(update) .unwrap_or_else(|e| panic!("could not update extension due to {e}")); } fn validate_test_objects_from_files( &mut self, root_config: ConnectionConfig<'_>, old_toolkit_version: String, ) -> Vec<(Test, Result<(), TestError>)> { let all_tests = parser::extract_tests("tests/update"); let old_toolkit_semver = match old_toolkit_version.as_str() { "1.4" => Version { major: 1, minor: 4, patch: 0, pre: Prerelease::EMPTY, build: BuildMetadata::EMPTY, }, "1.5" => Version { major: 1, minor: 5, patch: 0, pre: Prerelease::EMPTY, build: BuildMetadata::EMPTY, }, "1.10.0-dev" => Version { major: 1, minor: 10, patch: 0, pre: Prerelease::EMPTY, build: BuildMetadata::EMPTY, }, x => Version::parse(x).unwrap(), }; let errors: Vec<_> = all_tests .into_iter() .flat_map(|tests| { let test_db_name = format!("{}_{}", tests.name, old_toolkit_version); let test_config = root_config.with_db(&test_db_name); let mut test_client = connect_to(&test_config); test_client.update_to_current_toolkit_version(); let new_toolkit_version = test_client.get_installed_extension_version(); test_client.check_no_references_to_the_old_binary_leaked(&new_toolkit_version); test_client.validate_stable_objects_exist(); tests .tests .into_iter() .filter(|x| x.validation) .filter(|x| match &x.min_toolkit_version { Some(min_version) => min_version <= &old_toolkit_semver, None => true, }) .map(move |test| { let output = run_test(&mut test_client, &test); // ensure that the DB is dropped after the client (test, output) }) }) .collect(); errors } fn check_no_references_to_the_old_binary_leaked(&mut self, current_toolkit_version: &str) { let query_get_leaked_objects = format!( "SELECT pg_proc.proname \ FROM pg_catalog.pg_proc \ WHERE pg_proc.probin LIKE '$libdir/timescaledb_toolkit%' \ AND pg_proc.probin <> '$libdir/timescaledb_toolkit-{current_toolkit_version}';", ); let leaks = self .simple_query(&query_get_leaked_objects) .unwrap_or_else(|e| panic!("could query the leaked objects due to {e}")); let leaks = get_values(leaks); // flatten the list of returned objects for better output on errors // it shouldn't change the result since each row only has a single // non-null element anyway. let leaks: Vec = leaks .into_iter() .flat_map(Vec::into_iter) .flatten() .collect(); assert!( leaks.is_empty(), "objects reference the old binary: {:#?}", &*leaks, ) } #[must_use] fn get_installed_extension_version(&mut self) -> String { let get_extension_version = "\ SELECT extversion \ FROM pg_extension \ WHERE extname = 'timescaledb_toolkit'"; let updated_version = self .simple_query(get_extension_version) .unwrap_or_else(|e| panic!("could get updated extension version due to {e}")); get_values(updated_version) .pop() // should have 1 row .expect("no timescaledb_toolkit version") .pop() // row should have one value .expect("no timescaledb_toolkit version") .expect("no timescaledb_toolkit version") } pub(crate) fn validate_stable_objects_exist(&mut self) { for function in stabilization::STABLE_FUNCTIONS { let check_existence = format!("SELECT '{function}'::regprocedure;"); self.simple_query(&check_existence) .unwrap_or_else(|e| panic!("error checking function existence: {e}")); } for ty in stabilization::STABLE_TYPES { let check_existence = format!("SELECT '{ty}'::regtype;"); self.simple_query(&check_existence) .unwrap_or_else(|e| panic!("error checking type existence: {e}")); } for operator in stabilization::STABLE_OPERATORS() { let check_existence = format!("SELECT '{operator}'::regoperator;"); self.simple_query(&check_existence) .unwrap_or_else(|e| panic!("error checking operator existence: {e}")); } } } impl std::ops::Deref for TestClient { type Target = Client; fn deref(&self) -> &Self::Target { &self.0 } } impl std::ops::DerefMut for TestClient { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } // translate a messages into their contained values fn get_values(query_results: Vec) -> QueryValues { query_results .into_iter() .filter_map(|msg| match msg { SimpleQueryMessage::CommandComplete(_) => None, SimpleQueryMessage::Row(row) => { let mut values = Vec::with_capacity(row.len()); for i in 0..row.len() { values.push(row.get(i).map(|s| s.to_string())) } Some(values) } _ => unreachable!(), }) .collect() } // Functions below this line are originally from sql-doctester/src/runner.rs pub fn validate_output(output: Vec, test: &Test) -> Result<(), TestError> { use SimpleQueryMessage::*; let mut rows = Vec::with_capacity(test.output.len()); for r in output { match r { Row(r) => { let mut row: Vec = Vec::with_capacity(r.len()); for i in 0..r.len() { row.push(r.get(i).unwrap_or("").to_string()) } rows.push(row); } CommandComplete(_) => {} _ => unreachable!(), } } let output_error = |header: &str| { format!( "{}\n{expected}\n{}{}\n\n{received}\n{}{}\n\n{delta}\n{}", header, stringify_table(&test.output), format!("({} rows)", test.output.len()).dimmed(), stringify_table(&rows), format!("({} rows)", rows.len()).dimmed(), stringify_delta(&test.output, &rows), expected = "Expected".bold().blue(), received = "Received".bold().blue(), delta = "Delta".bold().blue(), ) }; if test.output.len() != rows.len() { return Err(TestError::OutputError(output_error( "output has a different number of rows than expected.", ))); } fn clamp_len<'s>(mut col: &'s str, idx: usize, test: &Test) -> &'s str { let max_len = test.precision_limits.get(&idx); if let Some(&max_len) = max_len { if col.len() > max_len { col = &col[..max_len] } } col } let all_eq = test.output.iter().zip(rows.iter()).all(|(out, row)| { out.len() == row.len() && out .iter() .zip(row.iter()) .enumerate() .all(|(i, (o, r))| clamp_len(o, i, test) == clamp_len(r, i, test)) }); if !all_eq { return Err(TestError::OutputError(output_error( "output has a different values than expected.", ))); } Ok(()) } fn stringify_table(table: &[Vec]) -> String { use std::{cmp::max, fmt::Write}; if table.is_empty() { return "---".to_string(); } let mut width = vec![0; table[0].len()]; for row in table { // Ensure that we have width for every column // TODO this shouldn't be needed, but sometimes is? if width.len() < row.len() { width.extend((0..row.len() - width.len()).map(|_| 0)); } for (i, value) in row.iter().enumerate() { width[i] = max(width[i], value.len()) } } let mut output = String::with_capacity(width.iter().sum::() + width.len() * 3); for row in table { for (i, value) in row.iter().enumerate() { if i != 0 { output.push_str(" | ") } let _ = write!(&mut output, "{:>width$}", value, width = width[i]); } output.push('\n') } output } #[allow(clippy::needless_range_loop)] fn stringify_delta(left: &[Vec], right: &[Vec]) -> String { use std::{cmp::max, fmt::Write}; static EMPTY_ROW: Vec = vec![]; static EMPTY_VAL: String = String::new(); let mut width = vec![ 0; max( left.first().map(Vec::len).unwrap_or(0), right.first().map(Vec::len).unwrap_or(0) ) ]; let num_rows = max(left.len(), right.len()); for i in 0..num_rows { let left = left.get(i).unwrap_or(&EMPTY_ROW); let right = right.get(i).unwrap_or(&EMPTY_ROW); let cols = max(left.len(), right.len()); for j in 0..cols { let left = left.get(j).unwrap_or(&EMPTY_VAL); let right = right.get(j).unwrap_or(&EMPTY_VAL); if left == right { width[j] = max(width[j], left.len()) } else { width[j] = max(width[j], left.len() + right.len() + 2) } } } let mut output = String::with_capacity(width.iter().sum::() + width.len() * 3); for i in 0..num_rows { let left = left.get(i).unwrap_or(&EMPTY_ROW); let right = right.get(i).unwrap_or(&EMPTY_ROW); let cols = max(left.len(), right.len()); for j in 0..cols { let left = left.get(j).unwrap_or(&EMPTY_VAL); let right = right.get(j).unwrap_or(&EMPTY_VAL); if j != 0 { let _ = write!(&mut output, " | "); } let (value, padding) = if left == right { (left.to_string(), width[j] - left.len()) } else { let padding = width[j] - (left.len() + right.len() + 2); let value = format!( "{}{}{}{}", "-".magenta(), left.magenta(), "+".yellow(), right.yellow() ); (value, padding) }; // trick to ensure correct padding, the color characters are counted // if done the normal way. let _ = write!(&mut output, "{:>padding$}{}", "", value, padding = padding); } let _ = writeln!(&mut output); } output } #[derive(Debug)] pub enum TestError { PgError(postgres::Error), OutputError(String), } impl fmt::Display for TestError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { TestError::PgError(error) => { match error.source().and_then(|e| e.downcast_ref::()) { Some(e) => { use postgres::error::ErrorPosition::*; let pos = match e.position() { Some(Original(pos)) => format!("At character {pos}"), Some(Internal { position, query }) => { format!("In internal query `{query}` at {position}") } None => String::new(), }; write!( f, "{}\n{}\n{}\n{}", "Postgres Error:".bold().red(), e, e.detail().unwrap_or(""), pos, ) } None => write!(f, "{error}"), } } TestError::OutputError(err) => write!(f, "{} {err}", "Error:".bold().red()), } } } impl From for TestError { fn from(error: postgres::Error) -> Self { TestError::PgError(error) } } impl TestError { pub fn annotate_position<'s>(&self, sql: &'s str) -> Cow<'s, str> { match self.location() { None => sql.into(), Some(pos) => format!( "{}{}{}", &sql[..pos as usize], "~>".bright_red(), &sql[pos as usize..], ) .into(), } } fn location(&self) -> Option { use postgres::error::ErrorPosition::*; match self { TestError::OutputError(..) => None, TestError::PgError(e) => match e .source() .and_then(|e| e.downcast_ref::().and_then(DbError::position)) { None => None, Some(Internal { .. }) => None, Some(Original(pos)) => Some(pos.saturating_sub(1)), }, } } } fn run_test(client: &mut Client, test: &Test) -> Result<(), TestError> { let output = client.simple_query(&test.text)?; validate_output(output, test) }