Repository: segmentio/parquet-go
Branch: main
Commit: 5d42db8f0d47
Files: 321
Total size: 1.5 MB

Directory structure:
gitextract_ou8jci6u/

├── .gitattributes
├── .github/
│   └── workflows/
│       └── test.yml
├── .gitignore
├── .mailmap
├── .words
├── AUTHORS.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── allocator.go
├── array.go
├── array_go18.go
├── bitmap.go
├── bloom/
│   ├── block.go
│   ├── block_amd64.go
│   ├── block_amd64.s
│   ├── block_default.go
│   ├── block_optimized.go
│   ├── block_test.go
│   ├── bloom.go
│   ├── bloom_test.go
│   ├── filter.go
│   ├── filter_amd64.go
│   ├── filter_amd64.s
│   ├── filter_default.go
│   ├── filter_test.go
│   ├── hash.go
│   └── xxhash/
│       ├── LICENSE
│       ├── sum64uint.go
│       ├── sum64uint_amd64.go
│       ├── sum64uint_amd64.s
│       ├── sum64uint_purego.go
│       ├── sum64uint_test.go
│       ├── xxhash.go
│       ├── xxhash_amd64.go
│       ├── xxhash_amd64.s
│       ├── xxhash_purego.go
│       └── xxhash_test.go
├── bloom.go
├── bloom_test.go
├── buffer.go
├── buffer_go18.go
├── buffer_go18_test.go
├── buffer_internal_test.go
├── buffer_pool.go
├── buffer_pool_test.go
├── buffer_test.go
├── column.go
├── column_buffer.go
├── column_buffer_amd64.go
├── column_buffer_amd64.s
├── column_buffer_go18.go
├── column_buffer_purego.go
├── column_buffer_test.go
├── column_chunk.go
├── column_index.go
├── column_index_internal_test.go
├── column_index_test.go
├── column_mapping.go
├── column_mapping_test.go
├── column_path.go
├── column_test.go
├── compare.go
├── compare_test.go
├── compress/
│   ├── brotli/
│   │   └── brotli.go
│   ├── compress.go
│   ├── compress_test.go
│   ├── gzip/
│   │   └── gzip.go
│   ├── lz4/
│   │   └── lz4.go
│   ├── snappy/
│   │   └── snappy.go
│   ├── uncompressed/
│   │   └── uncompressed.go
│   └── zstd/
│       └── zstd.go
├── compress.go
├── config.go
├── convert.go
├── convert_test.go
├── dedupe.go
├── dedupe_test.go
├── deprecated/
│   ├── int96.go
│   ├── int96_test.go
│   └── parquet.go
├── dictionary.go
├── dictionary_amd64.go
├── dictionary_amd64.s
├── dictionary_purego.go
├── dictionary_test.go
├── encoding/
│   ├── bitpacked/
│   │   ├── bitpacked.go
│   │   └── bitpacked_test.go
│   ├── bytestreamsplit/
│   │   ├── bytestreamsplit.go
│   │   ├── bytestreamsplit_amd64.go
│   │   ├── bytestreamsplit_amd64.s
│   │   ├── bytestreamsplit_purego.go
│   │   └── bytestreamsplit_test.go
│   ├── delta/
│   │   ├── binary_packed.go
│   │   ├── binary_packed_amd64.go
│   │   ├── binary_packed_amd64.s
│   │   ├── binary_packed_amd64_test.go
│   │   ├── binary_packed_purego.go
│   │   ├── binary_packed_test.go
│   │   ├── byte_array.go
│   │   ├── byte_array_amd64.go
│   │   ├── byte_array_amd64.s
│   │   ├── byte_array_purego.go
│   │   ├── byte_array_test.go
│   │   ├── delta.go
│   │   ├── delta_amd64.go
│   │   ├── delta_amd64.s
│   │   ├── delta_test.go
│   │   ├── length_byte_array.go
│   │   ├── length_byte_array_amd64.go
│   │   ├── length_byte_array_amd64.s
│   │   ├── length_byte_array_purego.go
│   │   ├── length_byte_array_test.go
│   │   └── testdata/
│   │       └── fuzz/
│   │           └── FuzzDeltaByteArray/
│   │               ├── 2404234dd7e87c04303eb7e58208d5b2ccb04fb616c18f3254e2375c4bc327e3
│   │               ├── 4cf9c92e5a2096e3d6c42eaf9b1e31d2567854d33e06c8d2d7a8c46437345850
│   │               ├── 9b210529f5e34e2dea5824929bf0d8242dc9c3165c0dce10bb376c50e21b38cc
│   │               └── fbe137144bcda3a149c8ea109703f3242192c5480ea1e82dde0ea24e94f3afef
│   ├── encoding.go
│   ├── encoding_test.go
│   ├── fuzz/
│   │   └── fuzz.go
│   ├── notsupported.go
│   ├── plain/
│   │   ├── dictionary.go
│   │   ├── plain.go
│   │   └── plain_test.go
│   ├── rle/
│   │   ├── dictionary.go
│   │   ├── rle.go
│   │   ├── rle_amd64.go
│   │   ├── rle_amd64.s
│   │   ├── rle_amd64_test.go
│   │   ├── rle_purego.go
│   │   ├── rle_test.go
│   │   └── testdata/
│   │       └── fuzz/
│   │           ├── FuzzEncodeBoolean/
│   │           │   ├── 6be5e340694798c2e5b94c758f0262edd2edf8af5795d4c6c60f6e02643bbb96
│   │           │   └── 9772b3f21a6f61810fe38d120bcc9da6d78540f22dc819a4201283608671fdf4
│   │           ├── FuzzEncodeInt32/
│   │           │   └── 06ba4bdb19de593e669c642987e270fe2488d4d58ecd712db136a3e011071253
│   │           └── FuzzEncodeLevels/
│   │               └── 0468684de48f926219bfc47be13ddf085b5a0ed9fbd9c40a005641b253e88d33
│   ├── test/
│   │   ├── test_go17.go
│   │   └── test_go18.go
│   ├── values.go
│   └── values_test.go
├── encoding.go
├── errors.go
├── example_test.go
├── file.go
├── file_test.go
├── filter.go
├── filter_test.go
├── format/
│   ├── parquet.go
│   └── parquet_test.go
├── go.mod
├── go.sum
├── hashprobe/
│   ├── aeshash/
│   │   ├── aeshash.go
│   │   ├── aeshash_amd64.go
│   │   ├── aeshash_amd64.s
│   │   ├── aeshash_purego.go
│   │   └── aeshash_test.go
│   ├── hashprobe.go
│   ├── hashprobe_amd64.go
│   ├── hashprobe_amd64.s
│   ├── hashprobe_purego.go
│   ├── hashprobe_test.go
│   └── wyhash/
│       ├── wyhash.go
│       ├── wyhash_amd64.go
│       ├── wyhash_amd64.s
│       ├── wyhash_purego.go
│       └── wyhash_test.go
├── internal/
│   ├── bitpack/
│   │   ├── bitpack.go
│   │   ├── masks_int32_amd64.s
│   │   ├── pack.go
│   │   ├── unpack.go
│   │   ├── unpack_int32_amd64.go
│   │   ├── unpack_int32_amd64.s
│   │   ├── unpack_int32_purego.go
│   │   ├── unpack_int64_amd64.go
│   │   ├── unpack_int64_amd64.s
│   │   ├── unpack_int64_purego.go
│   │   └── unpack_test.go
│   ├── bytealg/
│   │   ├── broadcast_amd64.go
│   │   ├── broadcast_amd64.s
│   │   ├── broadcast_purego.go
│   │   ├── broadcast_test.go
│   │   ├── bytealg.go
│   │   ├── bytealg_amd64.go
│   │   ├── bytealg_test.go
│   │   ├── count_amd64.go
│   │   ├── count_amd64.s
│   │   ├── count_purego.go
│   │   └── count_test.go
│   ├── debug/
│   │   ├── debug.go
│   │   ├── finalizer_off.go
│   │   └── finalizer_on.go
│   ├── quick/
│   │   └── quick.go
│   └── unsafecast/
│       ├── unsafecast_go17.go
│       ├── unsafecast_go18.go
│       └── unsafecast_go18_test.go
├── level.go
├── limits.go
├── merge.go
├── merge_test.go
├── multi_row_group.go
├── node.go
├── null.go
├── null_amd64.go
├── null_amd64.s
├── null_purego.go
├── null_test.go
├── offset_index.go
├── order.go
├── order_amd64.go
├── order_amd64.s
├── order_purego.go
├── order_test.go
├── page.go
├── page_bounds.go
├── page_bounds_amd64.go
├── page_bounds_amd64.s
├── page_bounds_purego.go
├── page_bounds_test.go
├── page_header.go
├── page_max.go
├── page_max_amd64.go
├── page_max_amd64.s
├── page_max_purego.go
├── page_max_test.go
├── page_min.go
├── page_min_amd64.go
├── page_min_amd64.s
├── page_min_purego.go
├── page_min_test.go
├── page_test.go
├── page_values.go
├── parquet.go
├── parquet_amd64.go
├── parquet_go18.go
├── parquet_go18_test.go
├── parquet_test.go
├── print.go
├── print_test.go
├── reader.go
├── reader_go18.go
├── reader_go18_test.go
├── reader_test.go
├── row.go
├── row_buffer.go
├── row_buffer_test.go
├── row_builder.go
├── row_builder_test.go
├── row_group.go
├── row_group_test.go
├── row_test.go
├── scan.go
├── scan_test.go
├── schema.go
├── schema_test.go
├── search.go
├── search_test.go
├── sorting.go
├── sorting_test.go
├── sparse/
│   ├── array.go
│   ├── gather.go
│   ├── gather_amd64.go
│   ├── gather_amd64.s
│   ├── gather_purego.go
│   ├── gather_test.go
│   └── sparse.go
├── testdata/
│   ├── alltypes_dictionary.parquet
│   ├── alltypes_plain.parquet
│   ├── alltypes_plain.snappy.parquet
│   ├── alltypes_tiny_pages.parquet
│   ├── alltypes_tiny_pages_plain.parquet
│   ├── binary.parquet
│   ├── byte_array_decimal.parquet
│   ├── cluster_test_table_1.snappy.parquet
│   ├── cluster_test_table_2.snappy.parquet
│   ├── covid.snappy.parquet
│   ├── data_index_bloom_encoding_stats.parquet
│   ├── datapage_v2.snappy.parquet
│   ├── delta_binary_packed.parquet
│   ├── delta_byte_array.parquet
│   ├── delta_encoding_optional_column.parquet
│   ├── delta_encoding_required_column.parquet
│   ├── delta_length_byte_array.parquet
│   ├── dict-page-offset-zero.parquet
│   ├── dms_test_table_LOAD00000001.parquet
│   ├── empty.parquet
│   ├── file.parquet
│   ├── fixed_length_decimal.parquet
│   ├── fixed_length_decimal_legacy.parquet
│   ├── int32_decimal.parquet
│   ├── int64_decimal.parquet
│   ├── issue368.parquet
│   ├── list_columns.parquet
│   ├── lz4_raw_compressed.parquet
│   ├── lz4_raw_compressed_larger.parquet
│   ├── nested_lists.snappy.parquet
│   ├── nested_maps.snappy.parquet
│   ├── nested_structs.rust.parquet
│   ├── nonnullable.impala.parquet
│   ├── null_list.parquet
│   ├── nullable.impala.parquet
│   ├── nulls.snappy.parquet
│   ├── repeated_no_annotation.parquet
│   ├── rle_boolean_encoding.parquet
│   ├── single_nan.parquet
│   ├── small.parquet
│   └── trace.snappy.parquet
├── transform.go
├── transform_test.go
├── type.go
├── value.go
├── value_amd64.go
├── value_amd64.s
├── value_go17.go
├── value_go18.go
├── value_test.go
├── values_purego.go
├── writer.go
├── writer_go18.go
├── writer_go18_test.go
└── writer_test.go

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
internal/gen-go/* linguist-generated=true

================================================
FILE: .github/workflows/test.yml
================================================
name: Test
on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - '*'

jobs:
  test:
    strategy:
      matrix:
        go:
        - '1.17.x'
        - '1.18.x'
        - '1.19.x'
        tags:
        - ''
        - purego
        label:
        - [self-hosted, linux, arm64, segment]
        - ubuntu-latest

    runs-on: ${{ matrix.label }}

    env:
      PARQUETGODEBUG: tracebuf=1

    steps:
    - uses: actions/checkout@v3

    - name: Setup Go ${{ matrix.go }}
      uses: actions/setup-go@v3
      with:
        go-version: ${{ matrix.go }}

    - name: Download Dependencies
      run: go mod download

    - name: Run Tests
      run: go test -trimpath -race -tags=${{ matrix.tags }} ./...

    - name: Run Benchmarks
      run: go test -trimpath -short -tags=${{ matrix.tags }} -run '^$' -bench . -benchtime 1x ./...

  format:
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v3

    - name: Setup Go ${{ matrix.go }}
      uses: actions/setup-go@v3
      with:
        go-version: 1.19.x

    - name: Validate formatting
      run: make format

  # https://github.com/golangci/golangci-lint/issues/2649
  # lint:
  #   runs-on: ubuntu-latest
  #   steps:
  #     - uses: actions/checkout@v3

  #     - uses: actions/setup-go@v3
  #       with:
  #         go-version: 1.18.x

  #     - name: golangci-lint
  #       uses: golangci/golangci-lint-action@v3
  #       with:
  #         version: latest


================================================
FILE: .gitignore
================================================
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/

# Emacs
*~
#*#
.#


================================================
FILE: .mailmap
================================================
Achille Roussel <achille@segment.com> Achille <achille@segment.com>
Thomas Pelletier <thomas.pelletier@segment.com> Thomas Pelletier <pelletier.thomas@gmail.com>


================================================
FILE: .words
================================================

RowType
Twilio
bottlenecked
decompressors
int96
millis
nanos
reindexing
repositions
schemas
ColumnPages
PageIndex
Zstandard
xxHash
cardinality
enums
32bit
dic
Blart
Versenwald
purego
stdlib
unscaled
cespare
bitset
checksumming


================================================
FILE: AUTHORS.txt
================================================
Achille Roussel <achille@segment.com>
Frederic Branczyk <fbranczyk@gmail.com>
Julien Fabre <julien@segment.com>
Kevin Burke <kevin.burke@segment.com>
Thomas Pelletier <thomas.pelletier@segment.com>


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

- Using welcoming and inclusive language
- Being respectful of differing viewpoints and experiences
- Gracefully accepting constructive criticism
- Focusing on what is best for the community
- Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

- The use of sexualized language or imagery and unwelcome sexual attention or
  advances
- Trolling, insulting/derogatory comments, and personal or political attacks
- Public or private harassment
- Publishing others' private information, such as a physical or electronic
  address, without explicit permission
- Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at open-source@twilio.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to segmentio/parquet

## Code of Conduct

Help us keep the project open and inclusive. Please be kind to and
considerate of other developers, as we all have the same goal: make
the project as good as it can be.

* [Code of Conduct](./CODE_OF_CONDUCT.md)

## Licensing

All third party contributors acknowledge that any contributions they provide
will be made under the same open source license that the open source project
is provided under.

## Contributing

* Open an Issue to report bugs or discuss non-trivial changes.
* Open a Pull Request to submit a code change for review.

### Coding Rules

To ensure consistency throughout the source code, keep these rules in mind
when submitting contributions:

* All features or bug fixes must be tested by one or more tests.
* All exported types, functions, and symbols must be documented.
* All code must be formatted with `go fmt`.


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

--------------------------------------------------------------------------------

This product includes code from Apache Parquet.

* deprecated/parquet.go is based on Apache Parquet's thrift file
* format/parquet.go is based on Apache Parquet's thrift file

Copyright: 2014 The Apache Software Foundation.
Home page: https://github.com/apache/parquet-format
License: http://www.apache.org/licenses/LICENSE-2.0


================================================
FILE: Makefile
================================================
.PHONY: format

AUTHORS.txt: .mailmap
	go install github.com/kevinburke/write_mailmap@latest
	write_mailmap > AUTHORS.txt

format:
	go install github.com/kevinburke/differ@latest
	differ gofmt -w .

test:
	go test -v -trimpath -race -tags= ./...


================================================
FILE: README.md
================================================
# Project has been Archived

Development has moved to https://github.com/parquet-go/parquet-go. No API's have
changed, we just decided to create a new organization for this library. Thank
you to all of the contributors for your hard work.

# segmentio/parquet-go

High-performance Go library to manipulate parquet files.


================================================
FILE: allocator.go
================================================
package parquet

import "github.com/segmentio/parquet-go/internal/unsafecast"

type allocator struct{ buffer []byte }

func (a *allocator) makeBytes(n int) []byte {
	if free := cap(a.buffer) - len(a.buffer); free < n {
		newCap := 2 * cap(a.buffer)
		if newCap == 0 {
			newCap = 4096
		}
		for newCap < n {
			newCap *= 2
		}
		a.buffer = make([]byte, 0, newCap)
	}

	i := len(a.buffer)
	j := len(a.buffer) + n
	a.buffer = a.buffer[:j]
	return a.buffer[i:j:j]
}

func (a *allocator) copyBytes(v []byte) []byte {
	b := a.makeBytes(len(v))
	copy(b, v)
	return b
}

func (a *allocator) copyString(v string) string {
	b := a.makeBytes(len(v))
	copy(b, v)
	return unsafecast.BytesToString(b)
}

func (a *allocator) reset() {
	a.buffer = a.buffer[:0]
}

// rowAllocator is a memory allocator used to make a copy of rows referencing
// memory buffers that parquet-go does not have ownership of.
//
// This type is used in the implementation of various readers and writers that
// need to capture rows passed to the ReadRows/WriteRows methods. Copies to a
// local buffer is necessary in those cases to repect the reader/writer
// contracts that do not allow the implementations to retain the rows they
// are passed as arguments.
//
// See: RowBuffer, DedupeRowReader, DedupeRowWriter
type rowAllocator struct{ allocator }

func (a *rowAllocator) capture(row Row) {
	for i, v := range row {
		switch v.Kind() {
		case ByteArray, FixedLenByteArray:
			row[i].ptr = unsafecast.AddressOfBytes(a.copyBytes(v.byteArray()))
		}
	}
}


================================================
FILE: array.go
================================================
package parquet

import (
	"unsafe"

	"github.com/segmentio/parquet-go/sparse"
)

func makeArrayValue(values []Value, offset uintptr) sparse.Array {
	ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
	return sparse.UnsafeArray(unsafe.Add(ptr, offset), len(values), unsafe.Sizeof(Value{}))
}

func makeArrayString(values []string) sparse.Array {
	str := ""
	ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
	return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof(str))
}

func makeArrayBE128(values []*[16]byte) sparse.Array {
	ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
	return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof((*[16]byte)(nil)))
}


================================================
FILE: array_go18.go
================================================
//go:build go1.18

package parquet

import (
	"unsafe"

	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
)

func makeArray(base unsafe.Pointer, length int, offset uintptr) sparse.Array {
	return sparse.UnsafeArray(base, length, offset)
}

func makeArrayOf[T any](s []T) sparse.Array {
	var model T
	return makeArray(unsafecast.PointerOf(s), len(s), unsafe.Sizeof(model))
}

func makeSlice[T any](a sparse.Array) []T {
	return slice[T](a.Index(0), a.Len())
}

func slice[T any](p unsafe.Pointer, n int) []T {
	return unsafe.Slice((*T)(p), n)
}

type sliceHeader struct {
	base unsafe.Pointer
	len  int
	cap  int
}


================================================
FILE: bitmap.go
================================================
package parquet

import "sync"

type bitmap struct {
	bits []uint64
}

func (m *bitmap) reset(size int) {
	size = (size + 63) / 64
	if cap(m.bits) < size {
		m.bits = make([]uint64, size, 2*size)
	} else {
		m.bits = m.bits[:size]
		m.clear()
	}
}

func (m *bitmap) clear() {
	for i := range m.bits {
		m.bits[i] = 0
	}
}

var (
	bitmapPool sync.Pool // *bitmap
)

func acquireBitmap(n int) *bitmap {
	b, _ := bitmapPool.Get().(*bitmap)
	if b == nil {
		b = &bitmap{bits: make([]uint64, n, 2*n)}
	} else {
		b.reset(n)
	}
	return b
}

func releaseBitmap(b *bitmap) {
	if b != nil {
		bitmapPool.Put(b)
	}
}


================================================
FILE: bloom/block.go
================================================
package bloom

import "unsafe"

// Word represents 32 bits words of bloom filter blocks.
type Word uint32

// Block represents bloom filter blocks which contain eight 32 bits words.
type Block [8]Word

// Bytes returns b as a byte slice.
func (b *Block) Bytes() []byte {
	return unsafe.Slice((*byte)(unsafe.Pointer(b)), BlockSize)
}

const (
	// BlockSize is the size of bloom filter blocks in bytes.
	BlockSize = 32

	salt0 = 0x47b6137b
	salt1 = 0x44974d91
	salt2 = 0x8824ad5b
	salt3 = 0xa2b7289d
	salt4 = 0x705495c7
	salt5 = 0x2df1424b
	salt6 = 0x9efc4947
	salt7 = 0x5c6bfb31
)


================================================
FILE: bloom/block_amd64.go
================================================
//go:build !purego

package bloom

import "golang.org/x/sys/cpu"

// The functions in this file are SIMD-optimized versions of the functions
// declared in block_optimized.go for x86 targets.
//
// The optimization yields measurable improvements over the pure Go versions:
//
// goos: darwin
// goarch: amd64
// pkg: github.com/segmentio/parquet-go/bloom
// cpu: Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
//
// name         old time/op    new time/op     delta
// BlockInsert    11.6ns ± 4%      2.0ns ± 3%   -82.37%  (p=0.000 n=8+8)
// BlockCheck     12.6ns ±28%      2.1ns ± 4%   -83.12%  (p=0.000 n=10+8)
//
// name         old speed      new speed       delta
// BlockInsert  2.73GB/s ±13%  15.70GB/s ± 3%  +475.96%  (p=0.000 n=9+8)
// BlockCheck   2.59GB/s ±23%  15.06GB/s ± 4%  +482.25%  (p=0.000 n=10+8)
//
// Note that the numbers above are a comparison to the routines implemented in
// block_optimized.go; the delta comparing to functions in block_default.go is
// significantly larger but not very interesting since those functions have no
// practical use cases.
var hasAVX2 = cpu.X86.HasAVX2

//go:noescape
func blockInsert(b *Block, x uint32)

//go:noescape
func blockCheck(b *Block, x uint32) bool

func (b *Block) Insert(x uint32) { blockInsert(b, x) }

func (b *Block) Check(x uint32) bool { return blockCheck(b, x) }


================================================
FILE: bloom/block_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define salt0 0x47b6137b
#define salt1 0x44974d91
#define salt2 0x8824ad5b
#define salt3 0xa2b7289d
#define salt4 0x705495c7
#define salt5 0x2df1424b
#define salt6 0x9efc4947
#define salt7 0x5c6bfb31

DATA ones+0(SB)/4, $1
DATA ones+4(SB)/4, $1
DATA ones+8(SB)/4, $1
DATA ones+12(SB)/4, $1
DATA ones+16(SB)/4, $1
DATA ones+20(SB)/4, $1
DATA ones+24(SB)/4, $1
DATA ones+28(SB)/4, $1
GLOBL ones(SB), RODATA|NOPTR, $32

DATA salt+0(SB)/4, $salt0
DATA salt+4(SB)/4, $salt1
DATA salt+8(SB)/4, $salt2
DATA salt+12(SB)/4, $salt3
DATA salt+16(SB)/4, $salt4
DATA salt+20(SB)/4, $salt5
DATA salt+24(SB)/4, $salt6
DATA salt+28(SB)/4, $salt7
GLOBL salt(SB), RODATA|NOPTR, $32

// This initial block is a SIMD implementation of the mask function declared in
// block_default.go and block_optimized.go. For each of the 8 x 32 bits words of
// the bloom filter block, the operation performed is:
//
//      block[i] = 1 << ((x * salt[i]) >> 27)
//
// Arguments
// ---------
//
// * src is a memory location where the value to use when computing the mask is
//   located. The memory location is not modified.
//
// * tmp is a YMM register used as scratch space to hold intermediary results in
//   the algorithm.
//
// * dst is a YMM register where the final mask is written.
//
#define generateMask(src, tmp, dst) \
    VMOVDQA ones(SB), dst \
    VPBROADCASTD src, tmp \
    VPMULLD salt(SB), tmp, tmp \
    VPSRLD $27, tmp, tmp \
    VPSLLVD tmp, dst, dst

#define insert(salt, src, dst) \
    MOVL src, CX \
    IMULL salt, CX \
    SHRL $27, CX \
    MOVL $1, DX \
    SHLL CX, DX \
    ORL DX, dst

#define check(salt, b, x) \
    MOVL b, CX \
    MOVL x, DX \
    IMULL salt, DX \
    SHRL $27, DX \
    BTL DX, CX \
    JAE notfound

// func blockInsert(b *Block, x uint32)
TEXT ·blockInsert(SB), NOSPLIT, $0-16
    MOVQ b+0(FP), AX
    CMPB ·hasAVX2(SB), $0
    JE fallback
avx2:
    generateMask(x+8(FP), Y1, Y0)
    // Set all 1 bits of the mask in the bloom filter block.
    VPOR (AX), Y0, Y0
    VMOVDQU Y0, (AX)
    VZEROUPPER
    RET
fallback:
    MOVL x+8(FP), BX
    insert($salt0, BX, 0(AX))
    insert($salt1, BX, 4(AX))
    insert($salt2, BX, 8(AX))
    insert($salt3, BX, 12(AX))
    insert($salt4, BX, 16(AX))
    insert($salt5, BX, 20(AX))
    insert($salt6, BX, 24(AX))
    insert($salt7, BX, 28(AX))
    RET

// func blockCheck(b *Block, x uint32) bool
TEXT ·blockCheck(SB), NOSPLIT, $0-17
    MOVQ b+0(FP), AX
    CMPB ·hasAVX2(SB), $0
    JE fallback
avx2:
    generateMask(x+8(FP), Y1, Y0)
    // Compare the 1 bits of the mask with the bloom filter block, then compare
    // the result with the mask, expecting equality if the value `x` was present
    // in the block.
    VPAND (AX), Y0, Y1 // Y0 = block & mask
    VPTEST Y0, Y1      // if (Y0 & ^Y1) != 0 { CF = 1 }
    SETCS ret+16(FP)   // return CF == 1
    VZEROUPPER
    RET
fallback:
    MOVL x+8(FP), BX
    check($salt0, 0(AX), BX)
    check($salt1, 4(AX), BX)
    check($salt2, 8(AX), BX)
    check($salt3, 12(AX), BX)
    check($salt4, 16(AX), BX)
    check($salt5, 20(AX), BX)
    check($salt6, 24(AX), BX)
    check($salt7, 28(AX), BX)
    MOVB $1, CX
    JMP done
notfound:
    XORB CX, CX
done:
    MOVB CX, ret+16(FP)
    RET


================================================
FILE: bloom/block_default.go
================================================
//go:build purego && parquet.bloom.no_unroll

package bloom

// This file contains direct translation of the algorithms described in the
// parquet bloom filter spec:
// https://github.com/apache/parquet-format/blob/master/BloomFilter.md
//
// There are no practical reasons to eable the parquet.bloom.no_unroll build
// tag, the code is left here as a reference to ensure that the optimized
// implementations of block operations behave the same as the functions in this
// file.

var salt = [8]uint32{
	0: salt0,
	1: salt1,
	2: salt2,
	3: salt3,
	4: salt4,
	5: salt5,
	6: salt6,
	7: salt7,
}

func (w *Word) set(i uint) {
	*w |= Word(1 << i)
}

func (w Word) has(i uint) bool {
	return ((w >> Word(i)) & 1) != 0
}

func mask(x uint32) Block {
	var b Block
	for i := uint(0); i < 8; i++ {
		y := x * salt[i]
		b[i].set(uint(y) >> 27)
	}
	return b
}

func (b *Block) Insert(x uint32) {
	masked := mask(x)
	for i := uint(0); i < 8; i++ {
		for j := uint(0); j < 32; j++ {
			if masked[i].has(j) {
				b[i].set(j)
			}
		}
	}
}

func (b *Block) Check(x uint32) bool {
	masked := mask(x)
	for i := uint(0); i < 8; i++ {
		for j := uint(0); j < 32; j++ {
			if masked[i].has(j) {
				if !b[i].has(j) {
					return false
				}
			}
		}
	}
	return true
}


================================================
FILE: bloom/block_optimized.go
================================================
//go:build (!amd64 || purego) && !parquet.bloom.no_unroll

package bloom

// The functions in this file are optimized versions of the algorithms described
// in https://github.com/apache/parquet-format/blob/master/BloomFilter.md
//
// The functions are manual unrolling of the loops, which yield significant
// performance improvements:
//
// goos: darwin
// goarch: amd64
// pkg: github.com/segmentio/parquet-go/bloom
// cpu: Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
//
// name         old time/op    new time/op      delta
// BlockInsert     327ns ± 1%        12ns ± 4%    -96.47%  (p=0.000 n=9+8)
// BlockCheck      240ns ± 4%        13ns ±28%    -94.75%  (p=0.000 n=8+10)
//
// name         old speed      new speed        delta
// BlockInsert  97.8MB/s ± 1%  2725.0MB/s ±13%  +2686.59%  (p=0.000 n=9+9)
// BlockCheck    133MB/s ± 4%    2587MB/s ±23%  +1838.46%  (p=0.000 n=8+10)
//
// The benchmarks measure throughput based on the byte size of a bloom filter
// block.

func (b *Block) Insert(x uint32) {
	b[0] |= 1 << ((x * salt0) >> 27)
	b[1] |= 1 << ((x * salt1) >> 27)
	b[2] |= 1 << ((x * salt2) >> 27)
	b[3] |= 1 << ((x * salt3) >> 27)
	b[4] |= 1 << ((x * salt4) >> 27)
	b[5] |= 1 << ((x * salt5) >> 27)
	b[6] |= 1 << ((x * salt6) >> 27)
	b[7] |= 1 << ((x * salt7) >> 27)
}

func (b *Block) Check(x uint32) bool {
	return ((b[0] & (1 << ((x * salt0) >> 27))) != 0) &&
		((b[1] & (1 << ((x * salt1) >> 27))) != 0) &&
		((b[2] & (1 << ((x * salt2) >> 27))) != 0) &&
		((b[3] & (1 << ((x * salt3) >> 27))) != 0) &&
		((b[4] & (1 << ((x * salt4) >> 27))) != 0) &&
		((b[5] & (1 << ((x * salt5) >> 27))) != 0) &&
		((b[6] & (1 << ((x * salt6) >> 27))) != 0) &&
		((b[7] & (1 << ((x * salt7) >> 27))) != 0)
}

func (f SplitBlockFilter) insertBulk(x []uint64) {
	for i := range x {
		f.Insert(x[i])
	}
}


================================================
FILE: bloom/block_test.go
================================================
package bloom_test

import (
	"math"
	"testing"

	"github.com/segmentio/parquet-go/bloom"
)

func TestBlock(t *testing.T) {
	for i := uint64(0); i < math.MaxUint32; i = (i * 2) + 1 {
		x := uint32(i)
		b := bloom.Block{}
		b.Insert(x)
		if !b.Check(x) {
			t.Fatalf("bloom filter block does not contain the value that was inserted: %d", x)
		}
		if b.Check(x - 1) {
			t.Fatalf("bloom filter block contains value that was not inserted: %d", ^x)
		}
		if b.Check(x + 1) {
			t.Fatalf("bloom filter block contains value that was not inserted: %d", ^x)
		}
		if b.Check(^x) {
			t.Fatalf("bloom filter block contains value that was not inserted: %d", ^x)
		}
	}
}

func BenchmarkBlockInsert(b *testing.B) {
	x := bloom.Block{}
	for i := 0; i < b.N; i++ {
		x.Insert(uint32(i))
	}
	b.SetBytes(bloom.BlockSize)
}

func BenchmarkBlockCheck(b *testing.B) {
	x := bloom.Block{}
	x.Insert(42)
	for i := 0; i < b.N; i++ {
		x.Check(42)
	}
	b.SetBytes(bloom.BlockSize)
}


================================================
FILE: bloom/bloom.go
================================================
// Package bloom implements parquet bloom filters.
package bloom

func fasthash1x64(value uint64, scale int32) uint64 {
	return ((value >> 32) * uint64(scale)) >> 32
}

func fasthash4x64(dst, src *[4]uint64, scale int32) {
	dst[0] = ((src[0] >> 32) * uint64(scale)) >> 32
	dst[1] = ((src[1] >> 32) * uint64(scale)) >> 32
	dst[2] = ((src[2] >> 32) * uint64(scale)) >> 32
	dst[3] = ((src[3] >> 32) * uint64(scale)) >> 32
}


================================================
FILE: bloom/bloom_test.go
================================================
package bloom

import (
	"math/rand"
	"testing"
)

// Test file for internal functions of the bloom package.
var global4x64 [4]uint64

func TestFasthash(t *testing.T) {
	r := rand.NewSource(0).(rand.Source64)

	src := [4]uint64{r.Uint64(), r.Uint64(), r.Uint64(), r.Uint64()}
	dst := [4]uint64{0, 0, 0, 0}
	exp := [4]uint64{483, 125, 335, 539}
	mod := int32(1024)

	fasthash4x64(&dst, &src, mod)

	if dst != exp {
		t.Errorf("got=%v want=%v", dst, exp)
	}
}

func BenchmarkFasthash(b *testing.B) {
	src := [4]uint64{}
	dst := [4]uint64{}
	mod := int32(1024)

	for i := 0; i < b.N; i++ {
		fasthash4x64(&dst, &src, mod)
	}

	b.SetBytes(32)
	global4x64 = dst // use it so the loop isn't optimized away
}


================================================
FILE: bloom/filter.go
================================================
package bloom

import (
	"io"
	"sync"
	"unsafe"
)

// Filter is an interface representing read-only bloom filters where programs
// can probe for the possible presence of a hash key.
type Filter interface {
	Check(uint64) bool
}

// SplitBlockFilter is an in-memory implementation of the parquet bloom filters.
//
// This type is useful to construct bloom filters that are later serialized
// to a storage medium.
type SplitBlockFilter []Block

// MakeSplitBlockFilter constructs a SplitBlockFilter value from the data byte
// slice.
func MakeSplitBlockFilter(data []byte) SplitBlockFilter {
	p := *(*unsafe.Pointer)(unsafe.Pointer(&data))
	n := len(data) / BlockSize
	return unsafe.Slice((*Block)(p), n)
}

// NumSplitBlocksOf returns the number of blocks in a filter intended to hold
// the given number of values and bits of filter per value.
//
// This function is useful to determine the number of blocks when creating bloom
// filters in memory, for example:
//
//	f := make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(n, 10))
func NumSplitBlocksOf(numValues int64, bitsPerValue uint) int {
	numBytes := ((uint(numValues) * bitsPerValue) + 7) / 8
	numBlocks := (numBytes + (BlockSize - 1)) / BlockSize
	return int(numBlocks)
}

// Reset clears the content of the filter f.
func (f SplitBlockFilter) Reset() {
	for i := range f {
		f[i] = Block{}
	}
}

// Block returns a pointer to the block that the given value hashes to in the
// bloom filter.
func (f SplitBlockFilter) Block(x uint64) *Block { return &f[fasthash1x64(x, int32(len(f)))] }

// InsertBulk adds all values from x into f.
func (f SplitBlockFilter) InsertBulk(x []uint64) { filterInsertBulk(f, x) }

// Insert adds x to f.
func (f SplitBlockFilter) Insert(x uint64) { filterInsert(f, x) }

// Check tests whether x is in f.
func (f SplitBlockFilter) Check(x uint64) bool { return filterCheck(f, x) }

// Bytes converts f to a byte slice.
//
// The returned slice shares the memory of f. The method is intended to be used
// to serialize the bloom filter to a storage medium.
func (f SplitBlockFilter) Bytes() []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&f)), len(f)*BlockSize)
}

// CheckSplitBlock is similar to bloom.SplitBlockFilter.Check but reads the
// bloom filter of n bytes from r.
//
// The size n of the bloom filter is assumed to be a multiple of the block size.
func CheckSplitBlock(r io.ReaderAt, n int64, x uint64) (bool, error) {
	block := acquireBlock()
	defer releaseBlock(block)
	offset := BlockSize * fasthash1x64(x, int32(n/BlockSize))
	_, err := r.ReadAt(block.Bytes(), int64(offset))
	return block.Check(uint32(x)), err
}

var (
	blockPool sync.Pool
)

func acquireBlock() *Block {
	b, _ := blockPool.Get().(*Block)
	if b == nil {
		b = new(Block)
	}
	return b
}

func releaseBlock(b *Block) {
	if b != nil {
		blockPool.Put(b)
	}
}


================================================
FILE: bloom/filter_amd64.go
================================================
//go:build !purego

package bloom

// This file contains the signatures for bloom filter algorithms implemented in
// filter_amd64.s.
//
// The assembly code provides significant speedups on filter inserts and checks,
// with the greatest gains seen on the bulk insert operation where the use of
// vectorized code yields great results.
//
// The following sections record the kind of performance improvements we were
// able to measure, comparing with performing the filter block lookups in Go
// and calling to the block insert and check routines:
//
// name              old time/op    new time/op     delta
// FilterInsertBulk    45.1ns ± 2%    17.8ns ± 3%   -60.41%  (p=0.000 n=10+10)
// FilterInsert        3.48ns ± 2%     2.55ns ± 1%  -26.86%  (p=0.000 n=10+8)
// FilterCheck         3.64ns ± 3%     2.66ns ± 2%  -26.82%  (p=0.000 n=10+9)
//
// name              old speed      new speed       delta
// FilterInsertBulk  11.4GB/s ± 2%  28.7GB/s ± 3%  +152.61%  (p=0.000 n=10+10)
// FilterInsert      9.19GB/s ± 2%  12.56GB/s ± 1%  +36.71%  (p=0.000 n=10+8)
// FilterCheck       8.80GB/s ± 3%  12.03GB/s ± 2%  +36.61%  (p=0.000 n=10+9)

//go:noescape
func filterInsertBulk(f []Block, x []uint64)

//go:noescape
func filterInsert(f []Block, x uint64)

//go:noescape
func filterCheck(f []Block, x uint64) bool


================================================
FILE: bloom/filter_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define salt0 0x47b6137b
#define salt1 0x44974d91
#define salt2 0x8824ad5b
#define salt3 0xa2b7289d
#define salt4 0x705495c7
#define salt5 0x2df1424b
#define salt6 0x9efc4947
#define salt7 0x5c6bfb31

// See block_amd64.s for a description of this algorithm.
#define generateMask(src, dst) \
    VMOVDQA ones(SB), dst \
    VPMULLD salt(SB), src, src \
    VPSRLD $27, src, src \
    VPSLLVD src, dst, dst

#define applyMask(src, dst) \
    VPOR dst, src, src \
    VMOVDQU src, dst

#define fasthash1x64(scale, value) \
    SHRQ $32, value \
    IMULQ scale, value \
    SHRQ $32, value \
    SHLQ $5, value

#define fasthash4x64(scale, value) \
    VPSRLQ $32, value, value \
    VPMULUDQ scale, value, value \
    VPSRLQ $32, value, value \
    VPSLLQ $5, value, value

#define extract4x64(srcYMM, srcXMM, tmpXMM, r0, r1, r2, r3) \
    VEXTRACTI128 $1, srcYMM, tmpXMM \
    MOVQ srcXMM, r0 \
    VPEXTRQ $1, srcXMM, r1 \
    MOVQ tmpXMM, r2 \
    VPEXTRQ $1, tmpXMM, r3

#define insert(salt, src, dst) \
    MOVL src, CX \
    IMULL salt, CX \
    SHRL $27, CX \
    MOVL $1, DX \
    SHLL CX, DX \
    ORL DX, dst

#define check(salt, b, x) \
    MOVL b, CX \
    MOVL x, DX \
    IMULL salt, DX \
    SHRL $27, DX \
    BTL DX, CX \
    JAE notfound

// func filterInsertBulk(f []Block, x []uint64)
TEXT ·filterInsertBulk(SB), NOSPLIT, $0-48
    MOVQ f_base+0(FP), AX
    MOVQ f_len+8(FP), CX
    MOVQ x_base+24(FP), BX
    MOVQ x_len+32(FP), DX
    CMPB ·hasAVX2(SB), $0
    JE fallback
avx2:
    VPBROADCASTQ f_base+8(FP), Y0
    // Loop initialization, SI holds the current index in `x`, DI is the number
    // of elements in `x` rounded down to the nearest multiple of 4.
    XORQ SI, SI
    MOVQ DX, DI
    SHRQ $2, DI
    SHLQ $2, DI
avx2loop4x64:
    CMPQ SI, DI
    JAE avx2loop1x64

    // The masks and indexes for 4 input hashes are computed in each loop
    // iteration. The hashes are loaded in Y1 so we can use vector instructions
    // to compute all 4 indexes in parallel. The lower 32 bits of the hashes are
    // also broadcasted in 4 YMM registers to compute the 4 masks that will then
    // be applied to the filter.
    VMOVDQU (BX)(SI*8), Y1
    VPBROADCASTD 0(BX)(SI*8), Y2
    VPBROADCASTD 8(BX)(SI*8), Y3
    VPBROADCASTD 16(BX)(SI*8), Y4
    VPBROADCASTD 24(BX)(SI*8), Y5

    fasthash4x64(Y0, Y1)
    generateMask(Y2, Y6)
    generateMask(Y3, Y7)
    generateMask(Y4, Y8)
    generateMask(Y5, Y9)

    // The next block of instructions move indexes from the vector to general
    // purpose registers in order to use them as offsets when applying the mask
    // to the filter.
    extract4x64(Y1, X1, X10, R8, R9, R10, R11)

    // Apply masks to the filter; this operation is sensitive to aliasing, when
    // blocks overlap the, CPU has to serialize the reads and writes, which has
    // a measurable impact on throughput. This would be frequent for small bloom
    // filters which may have only a few blocks, the probability of seeing
    // overlapping blocks on large filters should be small enough to make this
    // a non-issue though.
    applyMask(Y6, (AX)(R8*1))
    applyMask(Y7, (AX)(R9*1))
    applyMask(Y8, (AX)(R10*1))
    applyMask(Y9, (AX)(R11*1))

    ADDQ $4, SI
    JMP avx2loop4x64
avx2loop1x64:
    // Compute trailing elements in `x` if the length was not a multiple of 4.
    // This is the same algorithm as the one in the loop4x64 section, working
    // on a single mask/block pair at a time.
    CMPQ SI, DX
    JE avx2done
    MOVQ (BX)(SI*8), R8
    VPBROADCASTD (BX)(SI*8), Y0
    fasthash1x64(CX, R8)
    generateMask(Y0, Y1)
    applyMask(Y1, (AX)(R8*1))
    INCQ SI
    JMP avx2loop1x64
avx2done:
    VZEROUPPER
    JMP done
fallback:
    XORQ SI, SI
    MOVQ DX, DI
    MOVQ CX, R10
loop:
    CMPQ SI, DI
    JE done
    MOVLQZX (BX)(SI*8), R8
    MOVQ (BX)(SI*8), R9
    fasthash1x64(R10, R9)
    insert($salt0, R8, 0(AX)(R9*1))
    insert($salt1, R8, 4(AX)(R9*1))
    insert($salt2, R8, 8(AX)(R9*1))
    insert($salt3, R8, 12(AX)(R9*1))
    insert($salt4, R8, 16(AX)(R9*1))
    insert($salt5, R8, 20(AX)(R9*1))
    insert($salt6, R8, 24(AX)(R9*1))
    insert($salt7, R8, 28(AX)(R9*1))
    INCQ SI
    JMP loop
done:
    RET

// func filterInsert(f []Block, x uint64)
TEXT ·filterInsert(SB), NOSPLIT, $0-32
    MOVQ f_base+0(FP), AX
    MOVQ f_len+8(FP), BX
    MOVQ x+24(FP), CX
    fasthash1x64(BX, CX)
    CMPB ·hasAVX2(SB), $0
    JE fallback
avx2:
    VPBROADCASTD x+24(FP), Y1
    generateMask(Y1, Y0)
    applyMask(Y0, (AX)(CX*1))
    VZEROUPPER
    RET
fallback:
    ADDQ CX, AX
    MOVL x+24(FP), BX
    insert($salt0, BX, 0(AX))
    insert($salt1, BX, 4(AX))
    insert($salt2, BX, 8(AX))
    insert($salt3, BX, 12(AX))
    insert($salt4, BX, 16(AX))
    insert($salt5, BX, 20(AX))
    insert($salt6, BX, 24(AX))
    insert($salt7, BX, 28(AX))
    RET

// func filterCheck(f []Block, x uint64) bool
TEXT ·filterCheck(SB), NOSPLIT, $0-33
    MOVQ f_base+0(FP), AX
    MOVQ f_len+8(FP), BX
    MOVQ x+24(FP), CX
    fasthash1x64(BX, CX)
    CMPB ·hasAVX2(SB), $0
    JE fallback
avx2:
    VPBROADCASTD x+24(FP), Y1
    generateMask(Y1, Y0)
    VPAND (AX)(CX*1), Y0, Y1
    VPTEST Y0, Y1
    SETCS ret+32(FP)
    VZEROUPPER
    RET
fallback:
    ADDQ CX, AX
    MOVL x+24(FP), BX
    check($salt0, 0(AX), BX)
    check($salt1, 4(AX), BX)
    check($salt2, 8(AX), BX)
    check($salt3, 12(AX), BX)
    check($salt4, 16(AX), BX)
    check($salt5, 20(AX), BX)
    check($salt6, 24(AX), BX)
    check($salt7, 28(AX), BX)
    MOVB $1, CX
    JMP done
notfound:
    XORB CX, CX
done:
    MOVB CX, ret+32(FP)
    RET


================================================
FILE: bloom/filter_default.go
================================================
//go:build purego || !amd64

package bloom

func filterInsertBulk(f []Block, x []uint64) {
	for i := range x {
		filterInsert(f, x[i])
	}
}

func filterInsert(f []Block, x uint64) {
	f[fasthash1x64(x, int32(len(f)))].Insert(uint32(x))
}

func filterCheck(f []Block, x uint64) bool {
	return f[fasthash1x64(x, int32(len(f)))].Check(uint32(x))
}


================================================
FILE: bloom/filter_test.go
================================================
package bloom_test

import (
	"bytes"
	"math/rand"
	"testing"

	"github.com/segmentio/parquet-go/bloom"
)

func TestSplitBlockFilter(t *testing.T) {
	const N = 1000
	const S = 3
	f := make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(N, 10))
	p := rand.New(rand.NewSource(S))

	// Half of the values are inserted individually.
	for i := 0; i < N/2; i++ {
		f.Insert(p.Uint64())
	}
	// The other half is inserted as a bulk operation.
	b := make([]uint64, N/2)
	for i := range b {
		b[i] = p.Uint64()
	}
	f.InsertBulk(b)

	if f.Block(0) == nil {
		t.Fatal("looking up filter block returned impossible nil value")
	}

	for _, test := range []struct {
		scenario string
		filter   bloom.Filter
	}{
		{scenario: "filter", filter: f},
		{scenario: "reader", filter: newSerializedFilter(f.Bytes())},
	} {
		t.Run(test.scenario, func(t *testing.T) {
			p.Seed(S)
			falsePositives := 0

			for i := 0; i < N; i++ {
				x := p.Uint64()

				if !test.filter.Check(x) {
					t.Fatalf("bloom filter block does not contain the value #%d that was inserted: %d", i, x)
				}
				if test.filter.Check(^x) {
					falsePositives++
				}
			}

			if r := (float64(falsePositives) / N); r > 0.01 {
				t.Fatalf("bloom filter triggered too many false positives: %g%%", r*100)
			}
		})
	}

	t.Run("Reset", func(t *testing.T) {
		allZeros := true
		for _, b := range f.Bytes() {
			if b != 0 {
				allZeros = false
				break
			}
		}
		if allZeros {
			t.Fatal("bloom filter bytes were all zero after inserting keys")
		}
		f.Reset()
		for i, b := range f.Bytes() {
			if b != 0 {
				t.Fatalf("bloom filter byte at index %d was not zero after resetting the filter: %02X", i, b)
			}
		}
	})
}

func TestSplitBlockFilterBug1(t *testing.T) {
	// This test exercises the case where we bulk insert a single key in the
	// filter, which skips the core of the optimized assembly routines and runs
	// through the loop handling tails of remaining keys after consuming groups
	// of two or more.
	//
	// The use of quick.Check in bloom filter tests of the parquet package had
	// uncovered a bug which was reproduced here in isolation when debugging.
	h := [1]uint64{0b1000101001000001001001111000000100011011001000011110011100110000}
	f := make(bloom.SplitBlockFilter, 1)
	f.InsertBulk(h[:])
	if !f.Check(h[0]) {
		t.Error("value inserted in the filter was not found")
	}
}

type serializedFilter struct {
	bytes.Reader
}

func (f *serializedFilter) Check(x uint64) bool {
	ok, _ := bloom.CheckSplitBlock(&f.Reader, f.Size(), x)
	return ok
}

func newSerializedFilter(b []byte) *serializedFilter {
	f := new(serializedFilter)
	f.Reset(b)
	return f
}

func BenchmarkFilterInsertBulk(b *testing.B) {
	f := make(bloom.SplitBlockFilter, 99)
	x := make([]uint64, 16)
	r := rand.NewSource(0).(rand.Source64)

	for i := range x {
		x[i] = r.Uint64()
	}

	for i := 0; i < b.N; i++ {
		f.InsertBulk(x)
	}

	b.SetBytes(bloom.BlockSize * int64(len(x)))
}

func BenchmarkFilterInsert(b *testing.B) {
	f := make(bloom.SplitBlockFilter, 1)
	for i := 0; i < b.N; i++ {
		f.Insert(uint64(i))
	}
	b.SetBytes(bloom.BlockSize)
}

func BenchmarkFilterCheck(b *testing.B) {
	f := make(bloom.SplitBlockFilter, 1)
	f.Insert(42)
	for i := 0; i < b.N; i++ {
		f.Check(42)
	}
	b.SetBytes(bloom.BlockSize)
}


================================================
FILE: bloom/hash.go
================================================
package bloom

import "github.com/segmentio/parquet-go/bloom/xxhash"

// Hash is an interface abstracting the hashing algorithm used in bloom filters.
//
// Hash instances must be safe to use concurrently from multiple goroutines.
type Hash interface {
	// Returns the 64 bit hash of the value passed as argument.
	Sum64(value []byte) uint64

	// Compute hashes of individual values of primitive types.
	Sum64Uint8(value uint8) uint64
	Sum64Uint16(value uint16) uint64
	Sum64Uint32(value uint32) uint64
	Sum64Uint64(value uint64) uint64
	Sum64Uint128(value [16]byte) uint64

	// Compute hashes of the array of fixed size values passed as arguments,
	// returning the number of hashes written to the destination buffer.
	MultiSum64Uint8(dst []uint64, src []uint8) int
	MultiSum64Uint16(dst []uint64, src []uint16) int
	MultiSum64Uint32(dst []uint64, src []uint32) int
	MultiSum64Uint64(dst []uint64, src []uint64) int
	MultiSum64Uint128(dst []uint64, src [][16]byte) int
}

// XXH64 is an implementation of the Hash interface using the XXH64 algorithm.
type XXH64 struct{}

func (XXH64) Sum64(b []byte) uint64 {
	return xxhash.Sum64(b)
}

func (XXH64) Sum64Uint8(v uint8) uint64 {
	return xxhash.Sum64Uint8(v)
}

func (XXH64) Sum64Uint16(v uint16) uint64 {
	return xxhash.Sum64Uint16(v)
}

func (XXH64) Sum64Uint32(v uint32) uint64 {
	return xxhash.Sum64Uint32(v)
}

func (XXH64) Sum64Uint64(v uint64) uint64 {
	return xxhash.Sum64Uint64(v)
}

func (XXH64) Sum64Uint128(v [16]byte) uint64 {
	return xxhash.Sum64Uint128(v)
}

func (XXH64) MultiSum64Uint8(h []uint64, v []uint8) int {
	return xxhash.MultiSum64Uint8(h, v)
}

func (XXH64) MultiSum64Uint16(h []uint64, v []uint16) int {
	return xxhash.MultiSum64Uint16(h, v)
}

func (XXH64) MultiSum64Uint32(h []uint64, v []uint32) int {
	return xxhash.MultiSum64Uint32(h, v)
}

func (XXH64) MultiSum64Uint64(h []uint64, v []uint64) int {
	return xxhash.MultiSum64Uint64(h, v)
}

func (XXH64) MultiSum64Uint128(h []uint64, v [][16]byte) int {
	return xxhash.MultiSum64Uint128(h, v)
}

var (
	_ Hash = XXH64{}
)


================================================
FILE: bloom/xxhash/LICENSE
================================================
The following files in this directory were derived from the open-source
project at https://github.com/cespare/xxhash. A copy of the original
license is provided below.
------------------------------------------------------------------------

Copyright (c) 2016 Caleb Spare

MIT License

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: bloom/xxhash/sum64uint.go
================================================
package xxhash

func Sum64Uint8(v uint8) uint64 {
	h := prime5 + 1
	h ^= uint64(v) * prime5
	return avalanche(rol11(h) * prime1)
}

func Sum64Uint16(v uint16) uint64 {
	h := prime5 + 2
	h ^= uint64(v&0xFF) * prime5
	h = rol11(h) * prime1
	h ^= uint64(v>>8) * prime5
	h = rol11(h) * prime1
	return avalanche(h)
}

func Sum64Uint32(v uint32) uint64 {
	h := prime5 + 4
	h ^= uint64(v) * prime1
	return avalanche(rol23(h)*prime2 + prime3)
}

func Sum64Uint64(v uint64) uint64 {
	h := prime5 + 8
	h ^= round(0, v)
	return avalanche(rol27(h)*prime1 + prime4)
}

func Sum64Uint128(v [16]byte) uint64 {
	h := prime5 + 16
	h ^= round(0, u64(v[:8]))
	h = rol27(h)*prime1 + prime4
	h ^= round(0, u64(v[8:]))
	h = rol27(h)*prime1 + prime4
	return avalanche(h)
}


================================================
FILE: bloom/xxhash/sum64uint_amd64.go
================================================
//go:build !purego

package xxhash

import "golang.org/x/sys/cpu"

// This file contains the declaration of signatures for the multi hashing
// functions implemented in sum64uint_amd64.s, which provides vectorized
// versions of the algorithms written in sum64uint_purego.go.
//
// The use of SIMD optimization yields measurable throughput increases when
// computing multiple hash values in parallel compared to hashing values
// individually in loops:
//
// name                   old speed      new speed      delta
// MultiSum64Uint8/4KB    4.94GB/s ± 2%  6.82GB/s ± 5%  +38.00%  (p=0.000 n=10+10)
// MultiSum64Uint16/4KB   3.44GB/s ± 2%  4.63GB/s ± 4%  +34.56%  (p=0.000 n=10+10)
// MultiSum64Uint32/4KB   4.84GB/s ± 2%  6.39GB/s ± 4%  +31.94%  (p=0.000 n=10+10)
// MultiSum64Uint64/4KB   3.77GB/s ± 2%  4.95GB/s ± 2%  +31.14%  (p=0.000 n=9+10)
// MultiSum64Uint128/4KB  1.84GB/s ± 2%  3.11GB/s ± 4%  +68.70%  (p=0.000 n=9+10)
//
// name                   old hash/s     new hash/s     delta
// MultiSum64Uint8/4KB        617M ± 2%      852M ± 5%  +38.00%  (p=0.000 n=10+10)
// MultiSum64Uint16/4KB       431M ± 2%      579M ± 4%  +34.56%  (p=0.000 n=10+10)
// MultiSum64Uint32/4KB       605M ± 2%      799M ± 4%  +31.94%  (p=0.000 n=10+10)
// MultiSum64Uint64/4KB       471M ± 2%      618M ± 2%  +31.14%  (p=0.000 n=9+10)
// MultiSum64Uint128/4KB      231M ± 2%      389M ± 4%  +68.70%  (p=0.000 n=9+10)
//
// The benchmarks measure the throughput of hashes produced, as a rate of values
// and bytes.

var hasAVX512 = cpu.X86.HasAVX512 &&
	cpu.X86.HasAVX512F &&
	cpu.X86.HasAVX512CD

//go:noescape
func MultiSum64Uint8(h []uint64, v []uint8) int

//go:noescape
func MultiSum64Uint16(h []uint64, v []uint16) int

//go:noescape
func MultiSum64Uint32(h []uint64, v []uint32) int

//go:noescape
func MultiSum64Uint64(h []uint64, v []uint64) int

//go:noescape
func MultiSum64Uint128(h []uint64, v [][16]byte) int


================================================
FILE: bloom/xxhash/sum64uint_amd64.s
================================================
//go:build !purego

#include "textflag.h"

/*
The algorithms in this file are assembly versions of the Go functions in the
sum64uint_default.go file.

The implementations are mostly direct translations of the Go code to assembly,
leveraging SIMD instructions to process chunks of the input variables in
parallel at each loop iteration. To maximize utilization of the CPU capacity,
some of the functions unroll two steps of the vectorized loop per iteration,
which yields further throughput because the CPU is able to process some of the
instruction from the two steps in parallel due to having no data dependencies
between the inputs and outputs.

The use of AVX-512 yields a significant increase in throughput on all the
algorithms, in most part thanks to the VPMULLQ instructions which compute
8 x 64 bits multiplication. There were no equivalent instruction in AVX2, which
required emulating vector multiplication with a combination of 32 bits multiply,
additions, shifts, and masks: the amount of instructions and data dependencies
resulted in the AVX2 code yielding equivalent performance characteristics for a
much higher complexity.

The benchmark results below showcase the improvements that the AVX-512 code
yields on the XXH64 algorithms:

name                   old speed      new speed       delta
MultiSum64Uint8/4KB    4.97GB/s ± 0%  14.59GB/s ± 1%  +193.73%  (p=0.000 n=10+10)
MultiSum64Uint16/4KB   3.55GB/s ± 0%   9.46GB/s ± 0%  +166.20%  (p=0.000 n=10+9)
MultiSum64Uint32/4KB   4.48GB/s ± 0%  13.93GB/s ± 1%  +210.93%  (p=0.000 n=10+10)
MultiSum64Uint64/4KB   3.57GB/s ± 0%  11.12GB/s ± 1%  +211.73%  (p=0.000 n=9+10)
MultiSum64Uint128/4KB  2.54GB/s ± 0%   6.49GB/s ± 1%  +155.69%  (p=0.000 n=10+10)

name                   old hash/s     new hash/s      delta
MultiSum64Uint8/4KB        621M ± 0%      1823M ± 1%  +193.73%  (p=0.000 n=10+10)
MultiSum64Uint16/4KB       444M ± 0%      1182M ± 0%  +166.20%  (p=0.000 n=10+9)
MultiSum64Uint32/4KB       560M ± 0%      1742M ± 1%  +210.93%  (p=0.000 n=10+10)
MultiSum64Uint64/4KB       446M ± 0%      1391M ± 1%  +211.73%  (p=0.000 n=9+10)
MultiSum64Uint128/4KB      317M ± 0%       811M ± 1%  +155.69%  (p=0.000 n=10+10)

The functions perform runtime detection of AVX-512 support by testing the value
of the xxhash.hasAVX512 variable declared and initialized in sum64uint_amd64.go.
Branch mispredictions on those tests are very unlikely since the value is never
modified by the application. The cost of the comparisons are also amortized by
the bulk APIs of the MultiSum64* functions (a single test is required per call).

If a bug is suspected in the vectorized code, compiling the program or running
the tests with -tags=purego can help verify whether the behavior changes when
the program does not use the assembly versions.

Maintenance of these functions can be complex; however, the XXH64 algorithm is
unlikely to evolve, and the implementations unlikely to change. The tests in
sum64uint_test.go compare the outputs of MultiSum64* functions with the
reference xxhash.Sum64 function, future maintainers can rely on those tests
passing as a guarantee that they have not introduced regressions.
*/

#define PRIME1 0x9E3779B185EBCA87
#define PRIME2 0xC2B2AE3D27D4EB4F
#define PRIME3 0x165667B19E3779F9
#define PRIME4 0x85EBCA77C2B2AE63
#define PRIME5 0x27D4EB2F165667C5

#define prime1 R12
#define prime2 R13
#define prime3 R14
#define prime4 R15
#define prime5 R15 // same as prime4 because they are not used together

#define prime1ZMM Z12
#define prime2ZMM Z13
#define prime3ZMM Z14
#define prime4ZMM Z15
#define prime5ZMM Z15

DATA prime1vec<>+0(SB)/8, $PRIME1
DATA prime1vec<>+8(SB)/8, $PRIME1
DATA prime1vec<>+16(SB)/8, $PRIME1
DATA prime1vec<>+24(SB)/8, $PRIME1
DATA prime1vec<>+32(SB)/8, $PRIME1
DATA prime1vec<>+40(SB)/8, $PRIME1
DATA prime1vec<>+48(SB)/8, $PRIME1
DATA prime1vec<>+56(SB)/8, $PRIME1
GLOBL prime1vec<>(SB), RODATA|NOPTR, $64

DATA prime2vec<>+0(SB)/8, $PRIME2
DATA prime2vec<>+8(SB)/8, $PRIME2
DATA prime2vec<>+16(SB)/8, $PRIME2
DATA prime2vec<>+24(SB)/8, $PRIME2
DATA prime2vec<>+32(SB)/8, $PRIME2
DATA prime2vec<>+40(SB)/8, $PRIME2
DATA prime2vec<>+48(SB)/8, $PRIME2
DATA prime2vec<>+56(SB)/8, $PRIME2
GLOBL prime2vec<>(SB), RODATA|NOPTR, $64

DATA prime3vec<>+0(SB)/8, $PRIME3
DATA prime3vec<>+8(SB)/8, $PRIME3
DATA prime3vec<>+16(SB)/8, $PRIME3
DATA prime3vec<>+24(SB)/8, $PRIME3
DATA prime3vec<>+32(SB)/8, $PRIME3
DATA prime3vec<>+40(SB)/8, $PRIME3
DATA prime3vec<>+48(SB)/8, $PRIME3
DATA prime3vec<>+56(SB)/8, $PRIME3
GLOBL prime3vec<>(SB), RODATA|NOPTR, $64

DATA prime4vec<>+0(SB)/8, $PRIME4
DATA prime4vec<>+8(SB)/8, $PRIME4
DATA prime4vec<>+16(SB)/8, $PRIME4
DATA prime4vec<>+24(SB)/8, $PRIME4
DATA prime4vec<>+32(SB)/8, $PRIME4
DATA prime4vec<>+40(SB)/8, $PRIME4
DATA prime4vec<>+48(SB)/8, $PRIME4
DATA prime4vec<>+56(SB)/8, $PRIME4
GLOBL prime4vec<>(SB), RODATA|NOPTR, $64

DATA prime5vec<>+0(SB)/8, $PRIME5
DATA prime5vec<>+8(SB)/8, $PRIME5
DATA prime5vec<>+16(SB)/8, $PRIME5
DATA prime5vec<>+24(SB)/8, $PRIME5
DATA prime5vec<>+32(SB)/8, $PRIME5
DATA prime5vec<>+40(SB)/8, $PRIME5
DATA prime5vec<>+48(SB)/8, $PRIME5
DATA prime5vec<>+56(SB)/8, $PRIME5
GLOBL prime5vec<>(SB), RODATA|NOPTR, $64

DATA prime5vec1<>+0(SB)/8, $PRIME5+1
DATA prime5vec1<>+8(SB)/8, $PRIME5+1
DATA prime5vec1<>+16(SB)/8, $PRIME5+1
DATA prime5vec1<>+24(SB)/8, $PRIME5+1
DATA prime5vec1<>+32(SB)/8, $PRIME5+1
DATA prime5vec1<>+40(SB)/8, $PRIME5+1
DATA prime5vec1<>+48(SB)/8, $PRIME5+1
DATA prime5vec1<>+56(SB)/8, $PRIME5+1
GLOBL prime5vec1<>(SB), RODATA|NOPTR, $64

DATA prime5vec2<>+0(SB)/8, $PRIME5+2
DATA prime5vec2<>+8(SB)/8, $PRIME5+2
DATA prime5vec2<>+16(SB)/8, $PRIME5+2
DATA prime5vec2<>+24(SB)/8, $PRIME5+2
DATA prime5vec2<>+32(SB)/8, $PRIME5+2
DATA prime5vec2<>+40(SB)/8, $PRIME5+2
DATA prime5vec2<>+48(SB)/8, $PRIME5+2
DATA prime5vec2<>+56(SB)/8, $PRIME5+2
GLOBL prime5vec2<>(SB), RODATA|NOPTR, $64

DATA prime5vec4<>+0(SB)/8, $PRIME5+4
DATA prime5vec4<>+8(SB)/8, $PRIME5+4
DATA prime5vec4<>+16(SB)/8, $PRIME5+4
DATA prime5vec4<>+24(SB)/8, $PRIME5+4
DATA prime5vec4<>+32(SB)/8, $PRIME5+4
DATA prime5vec4<>+40(SB)/8, $PRIME5+4
DATA prime5vec4<>+48(SB)/8, $PRIME5+4
DATA prime5vec4<>+56(SB)/8, $PRIME5+4
GLOBL prime5vec4<>(SB), RODATA|NOPTR, $64

DATA prime5vec8<>+0(SB)/8, $PRIME5+8
DATA prime5vec8<>+8(SB)/8, $PRIME5+8
DATA prime5vec8<>+16(SB)/8, $PRIME5+8
DATA prime5vec8<>+24(SB)/8, $PRIME5+8
DATA prime5vec8<>+32(SB)/8, $PRIME5+8
DATA prime5vec8<>+40(SB)/8, $PRIME5+8
DATA prime5vec8<>+48(SB)/8, $PRIME5+8
DATA prime5vec8<>+56(SB)/8, $PRIME5+8
GLOBL prime5vec8<>(SB), RODATA|NOPTR, $64

DATA prime5vec16<>+0(SB)/8, $PRIME5+16
DATA prime5vec16<>+8(SB)/8, $PRIME5+16
DATA prime5vec16<>+16(SB)/8, $PRIME5+16
DATA prime5vec16<>+24(SB)/8, $PRIME5+16
DATA prime5vec16<>+32(SB)/8, $PRIME5+16
DATA prime5vec16<>+40(SB)/8, $PRIME5+16
DATA prime5vec16<>+48(SB)/8, $PRIME5+16
DATA prime5vec16<>+56(SB)/8, $PRIME5+16
GLOBL prime5vec16<>(SB), RODATA|NOPTR, $64

DATA lowbytemask<>+0(SB)/8, $0xFF
DATA lowbytemask<>+8(SB)/8, $0xFF
DATA lowbytemask<>+16(SB)/8, $0xFF
DATA lowbytemask<>+24(SB)/8, $0xFF
DATA lowbytemask<>+32(SB)/8, $0xFF
DATA lowbytemask<>+40(SB)/8, $0xFF
DATA lowbytemask<>+48(SB)/8, $0xFF
DATA lowbytemask<>+56(SB)/8, $0xFF
GLOBL lowbytemask<>(SB), RODATA|NOPTR, $64

DATA vpermi2qeven<>+0(SB)/8, $0
DATA vpermi2qeven<>+8(SB)/8, $2
DATA vpermi2qeven<>+16(SB)/8, $4
DATA vpermi2qeven<>+24(SB)/8, $6
DATA vpermi2qeven<>+32(SB)/8, $(1<<3)|0
DATA vpermi2qeven<>+40(SB)/8, $(1<<3)|2
DATA vpermi2qeven<>+48(SB)/8, $(1<<3)|4
DATA vpermi2qeven<>+56(SB)/8, $(1<<3)|6
GLOBL vpermi2qeven<>(SB), RODATA|NOPTR, $64

DATA vpermi2qodd<>+0(SB)/8, $1
DATA vpermi2qodd<>+8(SB)/8, $3
DATA vpermi2qodd<>+16(SB)/8, $5
DATA vpermi2qodd<>+24(SB)/8, $7
DATA vpermi2qodd<>+32(SB)/8, $(1<<3)|1
DATA vpermi2qodd<>+40(SB)/8, $(1<<3)|3
DATA vpermi2qodd<>+48(SB)/8, $(1<<3)|5
DATA vpermi2qodd<>+56(SB)/8, $(1<<3)|7
GLOBL vpermi2qodd<>(SB), RODATA|NOPTR, $64

#define round(input, acc) \
	IMULQ prime2, input \
	ADDQ  input, acc \
	ROLQ  $31, acc \
	IMULQ prime1, acc

#define avalanche(tmp, acc) \
    MOVQ acc, tmp \
    SHRQ $33, tmp \
    XORQ tmp, acc \
    IMULQ prime2, acc \
    MOVQ acc, tmp \
    SHRQ $29, tmp \
    XORQ tmp, acc \
    IMULQ prime3, acc \
    MOVQ acc, tmp \
    SHRQ $32, tmp \
    XORQ tmp, acc

#define round8x64(input, acc) \
    VPMULLQ prime2ZMM, input, input \
    VPADDQ input, acc, acc \
    VPROLQ $31, acc, acc \
    VPMULLQ prime1ZMM, acc, acc

#define avalanche8x64(tmp, acc) \
    VPSRLQ $33, acc, tmp \
    VPXORQ tmp, acc, acc \
    VPMULLQ prime2ZMM, acc, acc \
    VPSRLQ $29, acc, tmp \
    VPXORQ tmp, acc, acc \
    VPMULLQ prime3ZMM, acc, acc \
    VPSRLQ $32, acc, tmp \
    VPXORQ tmp, acc, acc

// func MultiSum64Uint8(h []uint64, v []uint8) int
TEXT ·MultiSum64Uint8(SB), NOSPLIT, $0-54
    MOVQ $PRIME1, prime1
    MOVQ $PRIME2, prime2
    MOVQ $PRIME3, prime3
    MOVQ $PRIME5, prime5

    MOVQ h_base+0(FP), AX
    MOVQ h_len+8(FP), CX
    MOVQ v_base+24(FP), BX
    MOVQ v_len+32(FP), DX

    CMPQ CX, DX
    CMOVQGT DX, CX
    MOVQ CX, ret+48(FP)

    XORQ SI, SI
    CMPQ CX, $32
    JB loop
    CMPB ·hasAVX512(SB), $0
    JE loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI

    VMOVDQU64 prime1vec<>(SB), prime1ZMM
    VMOVDQU64 prime2vec<>(SB), prime2ZMM
    VMOVDQU64 prime3vec<>(SB), prime3ZMM
    VMOVDQU64 prime5vec<>(SB), prime5ZMM
    VMOVDQU64 prime5vec1<>(SB), Z6
loop32x64:
    VMOVDQA64 Z6, Z0
    VMOVDQA64 Z6, Z3
    VMOVDQA64 Z6, Z20
    VMOVDQA64 Z6, Z23
    VPMOVZXBQ (BX)(SI*1), Z1
    VPMOVZXBQ 8(BX)(SI*1), Z4
    VPMOVZXBQ 16(BX)(SI*1), Z21
    VPMOVZXBQ 24(BX)(SI*1), Z24

    VPMULLQ prime5ZMM, Z1, Z1
    VPMULLQ prime5ZMM, Z4, Z4
    VPMULLQ prime5ZMM, Z21, Z21
    VPMULLQ prime5ZMM, Z24, Z24
    VPXORQ Z1, Z0, Z0
    VPXORQ Z4, Z3, Z3
    VPXORQ Z21, Z20, Z20
    VPXORQ Z24, Z23, Z23
    VPROLQ $11, Z0, Z0
    VPROLQ $11, Z3, Z3
    VPROLQ $11, Z20, Z20
    VPROLQ $11, Z23, Z23
    VPMULLQ prime1ZMM, Z0, Z0
    VPMULLQ prime1ZMM, Z3, Z3
    VPMULLQ prime1ZMM, Z20, Z20
    VPMULLQ prime1ZMM, Z23, Z23

    avalanche8x64(Z1, Z0)
    avalanche8x64(Z4, Z3)
    avalanche8x64(Z21, Z20)
    avalanche8x64(Z24, Z23)

    VMOVDQU64 Z0, (AX)(SI*8)
    VMOVDQU64 Z3, 64(AX)(SI*8)
    VMOVDQU64 Z20, 128(AX)(SI*8)
    VMOVDQU64 Z23, 192(AX)(SI*8)
    ADDQ $32, SI
    CMPQ SI, DI
    JB loop32x64
    VZEROUPPER
loop:
    CMPQ SI, CX
    JE done
    MOVQ $PRIME5+1, R8
    MOVBQZX (BX)(SI*1), R9

    IMULQ prime5, R9
    XORQ R9, R8
    ROLQ $11, R8
    IMULQ prime1, R8
    avalanche(R9, R8)

    MOVQ R8, (AX)(SI*8)
    INCQ SI
    JMP loop
done:
    RET

// func MultiSum64Uint16(h []uint64, v []uint16) int
TEXT ·MultiSum64Uint16(SB), NOSPLIT, $0-54
    MOVQ $PRIME1, prime1
    MOVQ $PRIME2, prime2
    MOVQ $PRIME3, prime3
    MOVQ $PRIME5, prime5

    MOVQ h_base+0(FP), AX
    MOVQ h_len+8(FP), CX
    MOVQ v_base+24(FP), BX
    MOVQ v_len+32(FP), DX

    CMPQ CX, DX
    CMOVQGT DX, CX
    MOVQ CX, ret+48(FP)

    XORQ SI, SI
    CMPQ CX, $16
    JB loop
    CMPB ·hasAVX512(SB), $0
    JE loop

    MOVQ CX, DI
    SHRQ $4, DI
    SHLQ $4, DI

    VMOVDQU64 prime1vec<>(SB), prime1ZMM
    VMOVDQU64 prime2vec<>(SB), prime2ZMM
    VMOVDQU64 prime3vec<>(SB), prime3ZMM
    VMOVDQU64 prime5vec<>(SB), prime5ZMM
    VMOVDQU64 prime5vec2<>(SB), Z6
    VMOVDQU64 lowbytemask<>(SB), Z7
loop16x64:
    VMOVDQA64 Z6, Z0
    VMOVDQA64 Z6, Z3
    VPMOVZXWQ (BX)(SI*2), Z1
    VPMOVZXWQ 16(BX)(SI*2), Z4

    VMOVDQA64 Z1, Z8
    VMOVDQA64 Z4, Z9
    VPSRLQ $8, Z8, Z8
    VPSRLQ $8, Z9, Z9
    VPANDQ Z7, Z1, Z1
    VPANDQ Z7, Z4, Z4

    VPMULLQ prime5ZMM, Z1, Z1
    VPMULLQ prime5ZMM, Z4, Z4
    VPXORQ Z1, Z0, Z0
    VPXORQ Z4, Z3, Z3
    VPROLQ $11, Z0, Z0
    VPROLQ $11, Z3, Z3
    VPMULLQ prime1ZMM, Z0, Z0
    VPMULLQ prime1ZMM, Z3, Z3

    VPMULLQ prime5ZMM, Z8, Z8
    VPMULLQ prime5ZMM, Z9, Z9
    VPXORQ Z8, Z0, Z0
    VPXORQ Z9, Z3, Z3
    VPROLQ $11, Z0, Z0
    VPROLQ $11, Z3, Z3
    VPMULLQ prime1ZMM, Z0, Z0
    VPMULLQ prime1ZMM, Z3, Z3

    avalanche8x64(Z1, Z0)
    avalanche8x64(Z4, Z3)

    VMOVDQU64 Z0, (AX)(SI*8)
    VMOVDQU64 Z3, 64(AX)(SI*8)
    ADDQ $16, SI
    CMPQ SI, DI
    JB loop16x64
    VZEROUPPER
loop:
    CMPQ SI, CX
    JE done
    MOVQ $PRIME5+2, R8
    MOVWQZX (BX)(SI*2), R9

    MOVQ R9, R10
    SHRQ $8, R10
    ANDQ $0xFF, R9

    IMULQ prime5, R9
    XORQ R9, R8
    ROLQ $11, R8
    IMULQ prime1, R8

    IMULQ prime5, R10
    XORQ R10, R8
    ROLQ $11, R8
    IMULQ prime1, R8

    avalanche(R9, R8)

    MOVQ R8, (AX)(SI*8)
    INCQ SI
    JMP loop
done:
    RET

// func MultiSum64Uint32(h []uint64, v []uint32) int
TEXT ·MultiSum64Uint32(SB), NOSPLIT, $0-54
    MOVQ $PRIME1, prime1
    MOVQ $PRIME2, prime2
    MOVQ $PRIME3, prime3

    MOVQ h_base+0(FP), AX
    MOVQ h_len+8(FP), CX
    MOVQ v_base+24(FP), BX
    MOVQ v_len+32(FP), DX

    CMPQ CX, DX
    CMOVQGT DX, CX
    MOVQ CX, ret+48(FP)

    XORQ SI, SI
    CMPQ CX, $32
    JB loop
    CMPB ·hasAVX512(SB), $0
    JE loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI

    VMOVDQU64 prime1vec<>(SB), prime1ZMM
    VMOVDQU64 prime2vec<>(SB), prime2ZMM
    VMOVDQU64 prime3vec<>(SB), prime3ZMM
    VMOVDQU64 prime5vec4<>(SB), Z6
loop32x64:
    VMOVDQA64 Z6, Z0
    VMOVDQA64 Z6, Z3
    VMOVDQA64 Z6, Z20
    VMOVDQA64 Z6, Z23
    VPMOVZXDQ (BX)(SI*4), Z1
    VPMOVZXDQ 32(BX)(SI*4), Z4
    VPMOVZXDQ 64(BX)(SI*4), Z21
    VPMOVZXDQ 96(BX)(SI*4), Z24

    VPMULLQ prime1ZMM, Z1, Z1
    VPMULLQ prime1ZMM, Z4, Z4
    VPMULLQ prime1ZMM, Z21, Z21
    VPMULLQ prime1ZMM, Z24, Z24
    VPXORQ Z1, Z0, Z0
    VPXORQ Z4, Z3, Z3
    VPXORQ Z21, Z20, Z20
    VPXORQ Z24, Z23, Z23
    VPROLQ $23, Z0, Z0
    VPROLQ $23, Z3, Z3
    VPROLQ $23, Z20, Z20
    VPROLQ $23, Z23, Z23
    VPMULLQ prime2ZMM, Z0, Z0
    VPMULLQ prime2ZMM, Z3, Z3
    VPMULLQ prime2ZMM, Z20, Z20
    VPMULLQ prime2ZMM, Z23, Z23
    VPADDQ prime3ZMM, Z0, Z0
    VPADDQ prime3ZMM, Z3, Z3
    VPADDQ prime3ZMM, Z20, Z20
    VPADDQ prime3ZMM, Z23, Z23

    avalanche8x64(Z1, Z0)
    avalanche8x64(Z4, Z3)
    avalanche8x64(Z21, Z20)
    avalanche8x64(Z24, Z23)

    VMOVDQU64 Z0, (AX)(SI*8)
    VMOVDQU64 Z3, 64(AX)(SI*8)
    VMOVDQU64 Z20, 128(AX)(SI*8)
    VMOVDQU64 Z23, 192(AX)(SI*8)
    ADDQ $32, SI
    CMPQ SI, DI
    JB loop32x64
    VZEROUPPER
loop:
    CMPQ SI, CX
    JE done
    MOVQ $PRIME5+4, R8
    MOVLQZX (BX)(SI*4), R9

    IMULQ prime1, R9
    XORQ R9, R8
    ROLQ $23, R8
    IMULQ prime2, R8
    ADDQ prime3, R8
    avalanche(R9, R8)

    MOVQ R8, (AX)(SI*8)
    INCQ SI
    JMP loop
done:
    RET

// func MultiSum64Uint64(h []uint64, v []uint64) int
TEXT ·MultiSum64Uint64(SB), NOSPLIT, $0-54
    MOVQ $PRIME1, prime1
    MOVQ $PRIME2, prime2
    MOVQ $PRIME3, prime3
    MOVQ $PRIME4, prime4

    MOVQ h_base+0(FP), AX
    MOVQ h_len+8(FP), CX
    MOVQ v_base+24(FP), BX
    MOVQ v_len+32(FP), DX

    CMPQ CX, DX
    CMOVQGT DX, CX
    MOVQ CX, ret+48(FP)

    XORQ SI, SI
    CMPQ CX, $32
    JB loop
    CMPB ·hasAVX512(SB), $0
    JE loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI

    VMOVDQU64 prime1vec<>(SB), prime1ZMM
    VMOVDQU64 prime2vec<>(SB), prime2ZMM
    VMOVDQU64 prime3vec<>(SB), prime3ZMM
    VMOVDQU64 prime4vec<>(SB), prime4ZMM
    VMOVDQU64 prime5vec8<>(SB), Z6
loop32x64:
    VMOVDQA64 Z6, Z0
    VMOVDQA64 Z6, Z3
    VMOVDQA64 Z6, Z20
    VMOVDQA64 Z6, Z23
    VMOVDQU64 (BX)(SI*8), Z1
    VMOVDQU64 64(BX)(SI*8), Z4
    VMOVDQU64 128(BX)(SI*8), Z21
    VMOVDQU64 192(BX)(SI*8), Z24

    VPXORQ Z2, Z2, Z2
    VPXORQ Z5, Z5, Z5
    VPXORQ Z22, Z22, Z22
    VPXORQ Z25, Z25, Z25
    round8x64(Z1, Z2)
    round8x64(Z4, Z5)
    round8x64(Z21, Z22)
    round8x64(Z24, Z25)

    VPXORQ Z2, Z0, Z0
    VPXORQ Z5, Z3, Z3
    VPXORQ Z22, Z20, Z20
    VPXORQ Z25, Z23, Z23
    VPROLQ $27, Z0, Z0
    VPROLQ $27, Z3, Z3
    VPROLQ $27, Z20, Z20
    VPROLQ $27, Z23, Z23
    VPMULLQ prime1ZMM, Z0, Z0
    VPMULLQ prime1ZMM, Z3, Z3
    VPMULLQ prime1ZMM, Z20, Z20
    VPMULLQ prime1ZMM, Z23, Z23
    VPADDQ prime4ZMM, Z0, Z0
    VPADDQ prime4ZMM, Z3, Z3
    VPADDQ prime4ZMM, Z20, Z20
    VPADDQ prime4ZMM, Z23, Z23

    avalanche8x64(Z1, Z0)
    avalanche8x64(Z4, Z3)
    avalanche8x64(Z21, Z20)
    avalanche8x64(Z24, Z23)

    VMOVDQU64 Z0, (AX)(SI*8)
    VMOVDQU64 Z3, 64(AX)(SI*8)
    VMOVDQU64 Z20, 128(AX)(SI*8)
    VMOVDQU64 Z23, 192(AX)(SI*8)
    ADDQ $32, SI
    CMPQ SI, DI
    JB loop32x64
    VZEROUPPER
loop:
    CMPQ SI, CX
    JE done
    MOVQ $PRIME5+8, R8
    MOVQ (BX)(SI*8), R9

    XORQ R10, R10
    round(R9, R10)
    XORQ R10, R8
    ROLQ $27, R8
    IMULQ prime1, R8
    ADDQ prime4, R8
    avalanche(R9, R8)

    MOVQ R8, (AX)(SI*8)
    INCQ SI
    JMP loop
done:
    RET

// func MultiSum64Uint128(h []uint64, v [][16]byte) int
TEXT ·MultiSum64Uint128(SB), NOSPLIT, $0-54
    MOVQ $PRIME1, prime1
    MOVQ $PRIME2, prime2
    MOVQ $PRIME3, prime3
    MOVQ $PRIME4, prime4

    MOVQ h_base+0(FP), AX
    MOVQ h_len+8(FP), CX
    MOVQ v_base+24(FP), BX
    MOVQ v_len+32(FP), DX

    CMPQ CX, DX
    CMOVQGT DX, CX
    MOVQ CX, ret+48(FP)

    XORQ SI, SI
    CMPQ CX, $16
    JB loop
    CMPB ·hasAVX512(SB), $0
    JE loop

    MOVQ CX, DI
    SHRQ $4, DI
    SHLQ $4, DI

    VMOVDQU64 prime1vec<>(SB), prime1ZMM
    VMOVDQU64 prime2vec<>(SB), prime2ZMM
    VMOVDQU64 prime3vec<>(SB), prime3ZMM
    VMOVDQU64 prime4vec<>(SB), prime4ZMM
    VMOVDQU64 prime5vec16<>(SB), Z6
    VMOVDQU64 vpermi2qeven<>(SB), Z7
    VMOVDQU64 vpermi2qodd<>(SB), Z8
loop16x64:
    // This algorithm is slightly different from the other ones, because it is
    // the only case where the input values are larger than the output (128 bits
    // vs 64 bits).
    //
    // Computing the XXH64 of 128 bits values requires doing two passes over the
    // lower and upper 64 bits. The lower and upper quad/ words are split in
    // separate vectors, the first pass is applied on the vector holding the
    // lower bits of 8 input values, then the second pass is applied with the
    // vector holding the upper bits.
    //
    // Following the model used in the other functions, we unroll the work of
    // two consecutive groups of 8 values per loop iteration in order to
    // maximize utilization of CPU resources.
    CMPQ SI, DI
    JE loop
    VMOVDQA64 Z6, Z0
    VMOVDQA64 Z6, Z20
    VMOVDQU64 (BX), Z1
    VMOVDQU64 64(BX), Z9
    VMOVDQU64 128(BX), Z21
    VMOVDQU64 192(BX), Z29

    VMOVDQA64 Z7, Z2
    VMOVDQA64 Z8, Z3
    VMOVDQA64 Z7, Z22
    VMOVDQA64 Z8, Z23

    VPERMI2Q Z9, Z1, Z2
    VPERMI2Q Z9, Z1, Z3
    VPERMI2Q Z29, Z21, Z22
    VPERMI2Q Z29, Z21, Z23

    // Compute the rounds on inputs.
    VPXORQ Z4, Z4, Z4
    VPXORQ Z5, Z5, Z5
    VPXORQ Z24, Z24, Z24
    VPXORQ Z25, Z25, Z25
    round8x64(Z2, Z4)
    round8x64(Z3, Z5)
    round8x64(Z22, Z24)
    round8x64(Z23, Z25)

    // Lower 64 bits.
    VPXORQ Z4, Z0, Z0
    VPXORQ Z24, Z20, Z20
    VPROLQ $27, Z0, Z0
    VPROLQ $27, Z20, Z20
    VPMULLQ prime1ZMM, Z0, Z0
    VPMULLQ prime1ZMM, Z20, Z20
    VPADDQ prime4ZMM, Z0, Z0
    VPADDQ prime4ZMM, Z20, Z20

    // Upper 64 bits.
    VPXORQ Z5, Z0, Z0
    VPXORQ Z25, Z20, Z20
    VPROLQ $27, Z0, Z0
    VPROLQ $27, Z20, Z20
    VPMULLQ prime1ZMM, Z0, Z0
    VPMULLQ prime1ZMM, Z20, Z20
    VPADDQ prime4ZMM, Z0, Z0
    VPADDQ prime4ZMM, Z20, Z20

    avalanche8x64(Z1, Z0)
    avalanche8x64(Z21, Z20)
    VMOVDQU64 Z0, (AX)(SI*8)
    VMOVDQU64 Z20, 64(AX)(SI*8)
    ADDQ $256, BX
    ADDQ $16, SI
    JMP loop16x64
    VZEROUPPER
loop:
    CMPQ SI, CX
    JE done
    MOVQ $PRIME5+16, R8
    MOVQ (BX), DX
    MOVQ 8(BX), DI

    XORQ R9, R9
    XORQ R10, R10
    round(DX, R9)
    round(DI, R10)

    XORQ R9, R8
    ROLQ $27, R8
    IMULQ prime1, R8
    ADDQ prime4, R8

    XORQ R10, R8
    ROLQ $27, R8
    IMULQ prime1, R8
    ADDQ prime4, R8

    avalanche(R9, R8)
    MOVQ R8, (AX)(SI*8)
    ADDQ $16, BX
    INCQ SI
    JMP loop
done:
    RET


================================================
FILE: bloom/xxhash/sum64uint_purego.go
================================================
//go:build purego || !amd64

package xxhash

func MultiSum64Uint8(h []uint64, v []uint8) int {
	n := min(len(h), len(v))
	h = h[:n]
	v = v[:n]
	for i := range v {
		h[i] = Sum64Uint8(v[i])
	}
	return n
}

func MultiSum64Uint16(h []uint64, v []uint16) int {
	n := min(len(h), len(v))
	h = h[:n]
	v = v[:n]
	for i := range v {
		h[i] = Sum64Uint16(v[i])
	}
	return n
}

func MultiSum64Uint32(h []uint64, v []uint32) int {
	n := min(len(h), len(v))
	h = h[:n]
	v = v[:n]
	for i := range v {
		h[i] = Sum64Uint32(v[i])
	}
	return n
}

func MultiSum64Uint64(h []uint64, v []uint64) int {
	n := min(len(h), len(v))
	h = h[:n]
	v = v[:n]
	for i := range v {
		h[i] = Sum64Uint64(v[i])
	}
	return n
}

func MultiSum64Uint128(h []uint64, v [][16]byte) int {
	n := min(len(h), len(v))
	h = h[:n]
	v = v[:n]
	for i := range v {
		h[i] = Sum64Uint128(v[i])
	}
	return n
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}


================================================
FILE: bloom/xxhash/sum64uint_test.go
================================================
package xxhash_test

import (
	"encoding/binary"
	"fmt"
	"testing"
	"testing/quick"
	"time"

	"github.com/segmentio/parquet-go/bloom/xxhash"
)

func TestSumUint8(t *testing.T) {
	b := [1]byte{0: 42}
	h := xxhash.Sum64Uint8(42)
	x := xxhash.Sum64(b[:])
	if h != x {
		t.Errorf("got %064b; want %064b", h, x)
	}
}

func TestSumUint16(t *testing.T) {
	b := [2]byte{0: 42}
	h := xxhash.Sum64Uint16(42)
	x := xxhash.Sum64(b[:])
	if h != x {
		t.Errorf("got %064b; want %064b", h, x)
	}
}

func TestSumUint32(t *testing.T) {
	b := [4]byte{0: 42}
	h := xxhash.Sum64Uint32(42)
	x := xxhash.Sum64(b[:])
	if h != x {
		t.Errorf("got %064b; want %064b", h, x)
	}
}

func TestSumUint64(t *testing.T) {
	b := [8]byte{0: 42}
	h := xxhash.Sum64Uint64(42)
	x := xxhash.Sum64(b[:])
	if h != x {
		t.Errorf("got %064b; want %064b", h, x)
	}
}

func TestSumUint128(t *testing.T) {
	b := [16]byte{0: 42}
	h := xxhash.Sum64Uint128(b)
	x := xxhash.Sum64(b[:])
	if h != x {
		t.Errorf("got %064b; want %064b", h, x)
	}
}

func TestMultiSum64Uint8(t *testing.T) {
	f := func(v []uint8) bool {
		h := make([]uint64, len(v))
		n := xxhash.MultiSum64Uint8(h, v)
		if n != len(v) {
			t.Errorf("return value mismatch: got %d; want %d", n, len(v))
			return false
		}
		for i := range h {
			x := xxhash.Sum64(v[i : i+1])
			if h[i] != x {
				t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
				return false
			}
		}
		return true
	}
	if err := quick.Check(f, nil); err != nil {
		t.Error(err)
	}
}

func TestMultiSum64Uint16(t *testing.T) {
	f := func(v []uint16) bool {
		h := make([]uint64, len(v))
		n := xxhash.MultiSum64Uint16(h, v)
		if n != len(v) {
			t.Errorf("return value mismatch: got %d; want %d", n, len(v))
			return false
		}
		for i := range h {
			b := [2]byte{}
			binary.LittleEndian.PutUint16(b[:], v[i])
			x := xxhash.Sum64(b[:])
			if h[i] != x {
				t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
				return false
			}
		}
		return true
	}
	if err := quick.Check(f, nil); err != nil {
		t.Error(err)
	}
}

func TestMultiSum64Uint32(t *testing.T) {
	f := func(v []uint32) bool {
		h := make([]uint64, len(v))
		n := xxhash.MultiSum64Uint32(h, v)
		if n != len(v) {
			t.Errorf("return value mismatch: got %d; want %d", n, len(v))
			return false
		}
		for i := range h {
			b := [4]byte{}
			binary.LittleEndian.PutUint32(b[:], v[i])
			x := xxhash.Sum64(b[:])
			if h[i] != x {
				t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
				return false
			}
		}
		return true
	}
	if err := quick.Check(f, nil); err != nil {
		t.Error(err)
	}
}

func TestMultiSum64Uint64(t *testing.T) {
	f := func(v []uint64) bool {
		h := make([]uint64, len(v))
		n := xxhash.MultiSum64Uint64(h, v)
		if n != len(v) {
			t.Errorf("return value mismatch: got %d; want %d", n, len(v))
			return false
		}
		for i := range h {
			b := [8]byte{}
			binary.LittleEndian.PutUint64(b[:], v[i])
			x := xxhash.Sum64(b[:])
			if h[i] != x {
				t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
				return false
			}
		}
		return true
	}
	if err := quick.Check(f, nil); err != nil {
		t.Error(err)
	}
}

func TestMultiSum64Uint128(t *testing.T) {
	f := func(v [][16]byte) bool {
		h := make([]uint64, len(v))
		n := xxhash.MultiSum64Uint128(h, v)
		if n != len(v) {
			t.Errorf("return value mismatch: got %d; want %d", n, len(v))
			return false
		}
		for i := range h {
			x := xxhash.Sum64(v[i][:])
			if h[i] != x {
				t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
				return false
			}
		}
		return true
	}
	if err := quick.Check(f, nil); err != nil {
		t.Error(err)
	}
}

func reportThroughput(b *testing.B, loops, count int, start time.Time) {
	throughput := float64(loops*count) / time.Since(start).Seconds()
	// Measure the throughput of writes to the output buffer;
	// it makes the results comparable across benchmarks that
	// have inputs of different sizes.
	b.SetBytes(8 * int64(count))
	b.ReportMetric(0, "ns/op")
	b.ReportMetric(throughput, "hash/s")
}

const benchmarkBufferSize = 4096

func BenchmarkMultiSum64Uint8(b *testing.B) {
	in := make([]uint8, benchmarkBufferSize)
	for i := range in {
		in[i] = uint8(i)
	}
	b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
		out := make([]uint64, len(in))
		start := time.Now()
		for i := 0; i < b.N; i++ {
			_ = xxhash.MultiSum64Uint8(out, in)
		}
		reportThroughput(b, b.N, len(out), start)
	})
}

func BenchmarkMultiSum64Uint16(b *testing.B) {
	in := make([]uint16, benchmarkBufferSize/2)
	for i := range in {
		in[i] = uint16(i)
	}
	b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
		out := make([]uint64, len(in))
		start := time.Now()
		for i := 0; i < b.N; i++ {
			_ = xxhash.MultiSum64Uint16(out, in)
		}
		reportThroughput(b, b.N, len(out), start)
	})
}

func BenchmarkMultiSum64Uint32(b *testing.B) {
	in := make([]uint32, benchmarkBufferSize/4)
	for i := range in {
		in[i] = uint32(i)
	}
	b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
		out := make([]uint64, len(in))
		start := time.Now()
		for i := 0; i < b.N; i++ {
			_ = xxhash.MultiSum64Uint32(out, in)
		}
		reportThroughput(b, b.N, len(out), start)
	})
}

func BenchmarkMultiSum64Uint64(b *testing.B) {
	in := make([]uint64, benchmarkBufferSize/8)
	for i := range in {
		in[i] = uint64(i)
	}
	b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
		out := make([]uint64, len(in))
		start := time.Now()
		for i := 0; i < b.N; i++ {
			_ = xxhash.MultiSum64Uint64(out, in)
		}
		reportThroughput(b, b.N, len(out), start)
	})
}

func BenchmarkMultiSum64Uint128(b *testing.B) {
	in := make([][16]byte, benchmarkBufferSize/16)
	for i := range in {
		binary.LittleEndian.PutUint64(in[i][:8], uint64(i))
		binary.LittleEndian.PutUint64(in[i][8:], uint64(i))
	}
	b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
		out := make([]uint64, len(in))
		start := time.Now()
		for i := 0; i < b.N; i++ {
			_ = xxhash.MultiSum64Uint128(out, in)
		}
		reportThroughput(b, b.N, len(out), start)
	})
}


================================================
FILE: bloom/xxhash/xxhash.go
================================================
// Package xxhash is an extension of github.com/cespare/xxhash which adds
// routines optimized to hash arrays of fixed size elements.
package xxhash

import (
	"encoding/binary"
	"math/bits"
)

const (
	prime1 uint64 = 0x9E3779B185EBCA87
	prime2 uint64 = 0xC2B2AE3D27D4EB4F
	prime3 uint64 = 0x165667B19E3779F9
	prime4 uint64 = 0x85EBCA77C2B2AE63
	prime5 uint64 = 0x27D4EB2F165667C5
	// Pre-computed operations because the compiler otherwise complains that the
	// results overflow 64 bit integers.
	prime1plus2 uint64 = 0x60EA27EEADC0B5D6 // prime1 + prime2
	negprime1   uint64 = 0x61C8864E7A143579 // -prime1
)

func avalanche(h uint64) uint64 {
	h ^= h >> 33
	h *= prime2
	h ^= h >> 29
	h *= prime3
	h ^= h >> 32
	return h
}

func round(acc, input uint64) uint64 {
	acc += input * prime2
	acc = rol31(acc)
	acc *= prime1
	return acc
}

func mergeRound(acc, val uint64) uint64 {
	val = round(0, val)
	acc ^= val
	acc = acc*prime1 + prime4
	return acc
}

func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }

func rol1(x uint64) uint64  { return bits.RotateLeft64(x, 1) }
func rol7(x uint64) uint64  { return bits.RotateLeft64(x, 7) }
func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }


================================================
FILE: bloom/xxhash/xxhash_amd64.go
================================================
//go:build !purego

package xxhash

// Sum64 computes the 64-bit xxHash digest of b.
func Sum64(b []byte) uint64


================================================
FILE: bloom/xxhash/xxhash_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define PRIME1 0x9E3779B185EBCA87
#define PRIME2 0xC2B2AE3D27D4EB4F
#define PRIME3 0x165667B19E3779F9
#define PRIME4 0x85EBCA77C2B2AE63
#define PRIME5 0x27D4EB2F165667C5

DATA prime3<>+0(SB)/8, $PRIME3
GLOBL prime3<>(SB), RODATA|NOPTR, $8

DATA prime5<>+0(SB)/8, $PRIME5
GLOBL prime5<>(SB), RODATA|NOPTR, $8

// Register allocation:
// AX	h
// SI	pointer to advance through b
// DX	n
// BX	loop end
// R8	v1, k1
// R9	v2
// R10	v3
// R11	v4
// R12	tmp
// R13	PRIME1
// R14	PRIME2
// DI	PRIME4

// round reads from and advances the buffer pointer in SI.
// It assumes that R13 has PRIME1 and R14 has PRIME2.
#define round(r) \
	MOVQ  (SI), R12 \
	ADDQ  $8, SI    \
	IMULQ R14, R12  \
	ADDQ  R12, r    \
	ROLQ  $31, r    \
	IMULQ R13, r

// mergeRound applies a merge round on the two registers acc and val.
// It assumes that R13 has PRIME1, R14 has PRIME2, and DI has PRIME4.
#define mergeRound(acc, val) \
	IMULQ R14, val \
	ROLQ  $31, val \
	IMULQ R13, val \
	XORQ  val, acc \
	IMULQ R13, acc \
	ADDQ  DI, acc

// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT, $0-32
	// Load fixed primes.
	MOVQ $PRIME1, R13
	MOVQ $PRIME2, R14
	MOVQ $PRIME4, DI

	// Load slice.
	MOVQ b_base+0(FP), SI
	MOVQ b_len+8(FP), DX
	LEAQ (SI)(DX*1), BX

	// The first loop limit will be len(b)-32.
	SUBQ $32, BX

	// Check whether we have at least one block.
	CMPQ DX, $32
	JLT  noBlocks

	// Set up initial state (v1, v2, v3, v4).
	MOVQ R13, R8
	ADDQ R14, R8
	MOVQ R14, R9
	XORQ R10, R10
	XORQ R11, R11
	SUBQ R13, R11

	// Loop until SI > BX.
blockLoop:
	round(R8)
	round(R9)
	round(R10)
	round(R11)

	CMPQ SI, BX
	JLE  blockLoop

	MOVQ R8, AX
	ROLQ $1, AX
	MOVQ R9, R12
	ROLQ $7, R12
	ADDQ R12, AX
	MOVQ R10, R12
	ROLQ $12, R12
	ADDQ R12, AX
	MOVQ R11, R12
	ROLQ $18, R12
	ADDQ R12, AX

	mergeRound(AX, R8)
	mergeRound(AX, R9)
	mergeRound(AX, R10)
	mergeRound(AX, R11)

	JMP afterBlocks

noBlocks:
	MOVQ $PRIME5, AX

afterBlocks:
	ADDQ DX, AX

	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
	ADDQ $24, BX

	CMPQ SI, BX
	JG   fourByte

wordLoop:
	// Calculate k1.
	MOVQ  (SI), R8
	ADDQ  $8, SI
	IMULQ R14, R8
	ROLQ  $31, R8
	IMULQ R13, R8

	XORQ  R8, AX
	ROLQ  $27, AX
	IMULQ R13, AX
	ADDQ  DI, AX

	CMPQ SI, BX
	JLE  wordLoop

fourByte:
	ADDQ $4, BX
	CMPQ SI, BX
	JG   singles

	MOVL  (SI), R8
	ADDQ  $4, SI
	IMULQ R13, R8
	XORQ  R8, AX

	ROLQ  $23, AX
	IMULQ R14, AX
	ADDQ  prime3<>(SB), AX

singles:
	ADDQ $4, BX
	CMPQ SI, BX
	JGE  finalize

singlesLoop:
	MOVBQZX (SI), R12
	ADDQ    $1, SI
	IMULQ   prime5<>(SB), R12
	XORQ    R12, AX

	ROLQ  $11, AX
	IMULQ R13, AX

	CMPQ SI, BX
	JL   singlesLoop

finalize:
	MOVQ  AX, R12
	SHRQ  $33, R12
	XORQ  R12, AX
	IMULQ R14, AX
	MOVQ  AX, R12
	SHRQ  $29, R12
	XORQ  R12, AX
	IMULQ prime3<>(SB), AX
	MOVQ  AX, R12
	SHRQ  $32, R12
	XORQ  R12, AX

	MOVQ AX, ret+24(FP)
	RET


================================================
FILE: bloom/xxhash/xxhash_purego.go
================================================
//go:build purego || !amd64

package xxhash

// Sum64 computes the 64-bit xxHash digest of b.
func Sum64(b []byte) uint64 {
	var n = len(b)
	var h uint64

	if n >= 32 {
		v1 := prime1plus2
		v2 := prime2
		v3 := uint64(0)
		v4 := negprime1
		for len(b) >= 32 {
			v1 = round(v1, u64(b[0:8:len(b)]))
			v2 = round(v2, u64(b[8:16:len(b)]))
			v3 = round(v3, u64(b[16:24:len(b)]))
			v4 = round(v4, u64(b[24:32:len(b)]))
			b = b[32:len(b):len(b)]
		}
		h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
		h = mergeRound(h, v1)
		h = mergeRound(h, v2)
		h = mergeRound(h, v3)
		h = mergeRound(h, v4)
	} else {
		h = prime5
	}

	h += uint64(n)

	i, end := 0, len(b)
	for ; i+8 <= end; i += 8 {
		k1 := round(0, u64(b[i:i+8:len(b)]))
		h ^= k1
		h = rol27(h)*prime1 + prime4
	}
	if i+4 <= end {
		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
		h = rol23(h)*prime2 + prime3
		i += 4
	}
	for ; i < end; i++ {
		h ^= uint64(b[i]) * prime5
		h = rol11(h) * prime1
	}

	return avalanche(h)
}


================================================
FILE: bloom/xxhash/xxhash_test.go
================================================
package xxhash_test

import (
	"testing"

	"github.com/segmentio/parquet-go/bloom/xxhash"
)

func TestSum64(t *testing.T) {
	for _, tt := range []struct {
		name  string
		input string
		want  uint64
	}{
		{"empty", "", 0xef46db3751d8e999},
		{"a", "a", 0xd24ec4f1a98c6e5b},
		{"as", "as", 0x1c330fb2d66be179},
		{"asd", "asd", 0x631c37ce72a97393},
		{"asdf", "asdf", 0x415872f599cea71e},
		{
			"len=63",
			// Exactly 63 characters, which exercises all code paths.
			"Call me Ishmael. Some years ago--never mind how long precisely-",
			0x02a2e85470d6fd96,
		},
	} {
		t.Run(tt.name, func(t *testing.T) {
			if got := xxhash.Sum64([]byte(tt.input)); got != tt.want {
				t.Fatalf("Sum64: got 0x%x; want 0x%x", got, tt.want)
			}
		})
	}
}

var benchmarks = []struct {
	name string
	n    int64
}{
	{"4B", 4},
	{"16B", 16},
	{"100B", 100},
	{"4KB", 4e3},
	{"10MB", 10e6},
}

func BenchmarkSum64(b *testing.B) {
	for _, bb := range benchmarks {
		in := make([]byte, bb.n)
		for i := range in {
			in[i] = byte(i)
		}
		b.Run(bb.name, func(b *testing.B) {
			b.SetBytes(bb.n)
			for i := 0; i < b.N; i++ {
				_ = xxhash.Sum64(in)
			}
		})
	}
}


================================================
FILE: bloom.go
================================================
package parquet

import (
	"io"

	"github.com/segmentio/parquet-go/bloom"
	"github.com/segmentio/parquet-go/bloom/xxhash"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

// BloomFilter is an interface allowing applications to test whether a key
// exists in a bloom filter.
type BloomFilter interface {
	// Implement the io.ReaderAt interface as a mechanism to allow reading the
	// raw bits of the filter.
	io.ReaderAt

	// Returns the size of the bloom filter (in bytes).
	Size() int64

	// Tests whether the given value is present in the filter.
	//
	// A non-nil error may be returned if reading the filter failed. This may
	// happen if the filter was lazily loaded from a storage medium during the
	// call to Check for example. Applications that can guarantee that the
	// filter was in memory at the time Check was called can safely ignore the
	// error, which would always be nil in this case.
	Check(value Value) (bool, error)
}

type bloomFilter struct {
	io.SectionReader
	hash  bloom.Hash
	check func(io.ReaderAt, int64, uint64) (bool, error)
}

func (f *bloomFilter) Check(v Value) (bool, error) {
	return f.check(&f.SectionReader, f.Size(), v.hash(f.hash))
}

func (v Value) hash(h bloom.Hash) uint64 {
	switch v.Kind() {
	case Boolean:
		return h.Sum64Uint8(v.byte())
	case Int32, Float:
		return h.Sum64Uint32(v.uint32())
	case Int64, Double:
		return h.Sum64Uint64(v.uint64())
	default: // Int96, ByteArray, FixedLenByteArray, or null
		return h.Sum64(v.byteArray())
	}
}

func newBloomFilter(file io.ReaderAt, offset int64, header *format.BloomFilterHeader) *bloomFilter {
	if header.Algorithm.Block != nil {
		if header.Hash.XxHash != nil {
			if header.Compression.Uncompressed != nil {
				return &bloomFilter{
					SectionReader: *io.NewSectionReader(file, offset, int64(header.NumBytes)),
					hash:          bloom.XXH64{},
					check:         bloom.CheckSplitBlock,
				}
			}
		}
	}
	return nil
}

// The BloomFilterColumn interface is a declarative representation of bloom filters
// used when configuring filters on a parquet writer.
type BloomFilterColumn interface {
	// Returns the path of the column that the filter applies to.
	Path() []string

	// Returns the hashing algorithm used when inserting values into a bloom
	// filter.
	Hash() bloom.Hash

	// Returns an encoding which can be used to write columns of values to the
	// filter.
	Encoding() encoding.Encoding

	// Returns the size of the filter needed to encode values in the filter,
	// assuming each value will be encoded with the given number of bits.
	Size(numValues int64) int
}

// SplitBlockFilter constructs a split block bloom filter object for the column
// at the given path, with the given bitsPerValue.
//
// If you are unsure what number of bitsPerValue to use, 10 is a reasonable
// tradeoff between size and error rate for common datasets.
//
// For more information on the tradeoff between size and error rate, consult
// this website: https://hur.st/bloomfilter/?n=4000&p=0.1&m=&k=1
func SplitBlockFilter(bitsPerValue uint, path ...string) BloomFilterColumn {
	return splitBlockFilter{
		bitsPerValue: bitsPerValue,
		path:         path,
	}
}

type splitBlockFilter struct {
	bitsPerValue uint
	path         []string
}

func (f splitBlockFilter) Path() []string              { return f.path }
func (f splitBlockFilter) Hash() bloom.Hash            { return bloom.XXH64{} }
func (f splitBlockFilter) Encoding() encoding.Encoding { return splitBlockEncoding{} }

func (f splitBlockFilter) Size(numValues int64) int {
	return bloom.BlockSize * bloom.NumSplitBlocksOf(numValues, f.bitsPerValue)
}

// Creates a header from the given bloom filter.
//
// For now there is only one type of filter supported, but we provide this
// function to suggest a model for extending the implementation if new filters
// are added to the parquet specs.
func bloomFilterHeader(filter BloomFilterColumn) (header format.BloomFilterHeader) {
	switch filter.(type) {
	case splitBlockFilter:
		header.Algorithm.Block = &format.SplitBlockAlgorithm{}
	}
	switch filter.Hash().(type) {
	case bloom.XXH64:
		header.Hash.XxHash = &format.XxHash{}
	}
	header.Compression.Uncompressed = &format.BloomFilterUncompressed{}
	return header
}

func searchBloomFilterColumn(filters []BloomFilterColumn, path columnPath) BloomFilterColumn {
	for _, f := range filters {
		if path.equal(f.Path()) {
			return f
		}
	}
	return nil
}

const (
	// Size of the stack buffer used to perform bulk operations on bloom filters.
	//
	// This value was determined as being a good default empirically,
	// 128 x uint64 makes a 1KiB buffer which amortizes the cost of calling
	// methods of bloom filters while not causing too much stack growth either.
	filterEncodeBufferSize = 128
)

type splitBlockEncoding struct {
	encoding.NotSupported
}

func (splitBlockEncoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) {
	splitBlockEncodeUint8(bloom.MakeSplitBlockFilter(dst), src)
	return dst, nil
}

func (splitBlockEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
	splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Int32ToUint32(src))
	return dst, nil
}

func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
	splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Int64ToUint64(src))
	return dst, nil
}

func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) {
	splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), deprecated.Int96ToBytes(src), 12)
	return dst, nil
}

func (splitBlockEncoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
	splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Float32ToUint32(src))
	return dst, nil
}

func (splitBlockEncoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
	splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Float64ToUint64(src))
	return dst, nil
}

func (splitBlockEncoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
	filter := bloom.MakeSplitBlockFilter(dst)
	buffer := make([]uint64, 0, filterEncodeBufferSize)
	baseOffset := offsets[0]

	for _, endOffset := range offsets[1:] {
		value := src[baseOffset:endOffset:endOffset]
		baseOffset = endOffset

		if len(buffer) == cap(buffer) {
			filter.InsertBulk(buffer)
			buffer = buffer[:0]
		}

		buffer = append(buffer, xxhash.Sum64(value))
	}

	filter.InsertBulk(buffer)
	return dst, nil
}

func (splitBlockEncoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
	filter := bloom.MakeSplitBlockFilter(dst)
	if size == 16 {
		splitBlockEncodeUint128(filter, unsafecast.BytesToUint128(src))
	} else {
		splitBlockEncodeFixedLenByteArray(filter, src, size)
	}
	return dst, nil
}

func splitBlockEncodeFixedLenByteArray(filter bloom.SplitBlockFilter, data []byte, size int) {
	buffer := make([]uint64, 0, filterEncodeBufferSize)

	for i, j := 0, size; j <= len(data); {
		if len(buffer) == cap(buffer) {
			filter.InsertBulk(buffer)
			buffer = buffer[:0]
		}
		buffer = append(buffer, xxhash.Sum64(data[i:j]))
		i += size
		j += size
	}

	filter.InsertBulk(buffer)
}

func splitBlockEncodeUint8(filter bloom.SplitBlockFilter, values []uint8) {
	buffer := make([]uint64, filterEncodeBufferSize)

	for i := 0; i < len(values); {
		n := xxhash.MultiSum64Uint8(buffer, values[i:])
		filter.InsertBulk(buffer[:n])
		i += n
	}
}

func splitBlockEncodeUint32(filter bloom.SplitBlockFilter, values []uint32) {
	buffer := make([]uint64, filterEncodeBufferSize)

	for i := 0; i < len(values); {
		n := xxhash.MultiSum64Uint32(buffer, values[i:])
		filter.InsertBulk(buffer[:n])
		i += n
	}
}

func splitBlockEncodeUint64(filter bloom.SplitBlockFilter, values []uint64) {
	buffer := make([]uint64, filterEncodeBufferSize)

	for i := 0; i < len(values); {
		n := xxhash.MultiSum64Uint64(buffer, values[i:])
		filter.InsertBulk(buffer[:n])
		i += n
	}
}

func splitBlockEncodeUint128(filter bloom.SplitBlockFilter, values [][16]byte) {
	buffer := make([]uint64, filterEncodeBufferSize)

	for i := 0; i < len(values); {
		n := xxhash.MultiSum64Uint128(buffer, values[i:])
		filter.InsertBulk(buffer[:n])
		i += n
	}
}


================================================
FILE: bloom_test.go
================================================
package parquet

import (
	"math/rand"
	"testing"

	"github.com/segmentio/parquet-go/bloom"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/internal/quick"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

func TestSplitBlockFilter(t *testing.T) {
	newFilter := func(numValues int) bloom.SplitBlockFilter {
		return make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(int64(numValues), 11))
	}

	enc := SplitBlockFilter(10, "$").Encoding()

	check := func(filter bloom.SplitBlockFilter, value Value) bool {
		return filter.Check(value.hash(&bloom.XXH64{}))
	}

	tests := []struct {
		scenario string
		function interface{}
	}{
		{
			scenario: "BOOLEAN",
			function: func(values []bool) bool {
				filter := newFilter(len(values))
				enc.EncodeBoolean(filter.Bytes(), unsafecast.BoolToBytes(values))
				for _, v := range values {
					if !check(filter, ValueOf(v)) {
						return false
					}
				}
				return true
			},
		},

		{
			scenario: "INT32",
			function: func(values []int32) bool {
				filter := newFilter(len(values))
				enc.EncodeInt32(filter.Bytes(), values)
				for _, v := range values {
					if !check(filter, ValueOf(v)) {
						return false
					}
				}
				return true
			},
		},

		{
			scenario: "INT64",
			function: func(values []int64) bool {
				filter := newFilter(len(values))
				enc.EncodeInt64(filter.Bytes(), values)
				for _, v := range values {
					if !check(filter, ValueOf(v)) {
						return false
					}
				}
				return true
			},
		},

		{
			scenario: "INT96",
			function: func(values []deprecated.Int96) bool {
				filter := newFilter(len(values))
				enc.EncodeInt96(filter.Bytes(), values)
				for _, v := range values {
					if !check(filter, ValueOf(v)) {
						return false
					}
				}
				return true
			},
		},

		{
			scenario: "FLOAT",
			function: func(values []float32) bool {
				filter := newFilter(len(values))
				enc.EncodeFloat(filter.Bytes(), values)
				for _, v := range values {
					if !check(filter, ValueOf(v)) {
						return false
					}
				}
				return true
			},
		},

		{
			scenario: "DOUBLE",
			function: func(values []float64) bool {
				filter := newFilter(len(values))
				enc.EncodeDouble(filter.Bytes(), values)
				for _, v := range values {
					if !check(filter, ValueOf(v)) {
						return false
					}
				}
				return true
			},
		},

		{
			scenario: "BYTE_ARRAY",
			function: func(values [][]byte) bool {
				content := make([]byte, 0, 512)
				offsets := make([]uint32, len(values))
				for _, value := range values {
					offsets = append(offsets, uint32(len(content)))
					content = append(content, value...)
				}
				offsets = append(offsets, uint32(len(content)))
				filter := newFilter(len(values))
				enc.EncodeByteArray(filter.Bytes(), content, offsets)
				for _, v := range values {
					if !check(filter, ValueOf(v)) {
						return false
					}
				}
				return true
			},
		},

		{
			scenario: "FIXED_LEN_BYTE_ARRAY",
			function: func(values []byte) bool {
				filter := newFilter(len(values))
				enc.EncodeFixedLenByteArray(filter.Bytes(), values, 1)
				for _, v := range values {
					if !check(filter, ValueOf([1]byte{v})) {
						return false
					}
				}
				return true
			},
		},
	}

	for _, test := range tests {
		t.Run(test.scenario, func(t *testing.T) {
			if err := quick.Check(test.function); err != nil {
				t.Error(err)
			}
		})
	}
}

func BenchmarkSplitBlockFilter(b *testing.B) {
	const N = 1000
	f := make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(N, 10)).Bytes()
	e := SplitBlockFilter(10, "$").Encoding()

	v := make([]int64, N)
	r := rand.NewSource(10)
	for i := range v {
		v[i] = r.Int63()
	}

	for i := 0; i < b.N; i++ {
		e.EncodeInt64(f, v)
	}

	b.SetBytes(8 * N)
}


================================================
FILE: buffer.go
================================================
package parquet

import (
	"log"
	"runtime"
	"sort"
	"sync"
	"sync/atomic"

	"github.com/segmentio/parquet-go/internal/debug"
)

// Buffer represents an in-memory group of parquet rows.
//
// The main purpose of the Buffer type is to provide a way to sort rows before
// writing them to a parquet file. Buffer implements sort.Interface as a way
// to support reordering the rows that have been written to it.
type Buffer struct {
	config  *RowGroupConfig
	schema  *Schema
	rowbuf  []Row
	colbuf  [][]Value
	chunks  []ColumnChunk
	columns []ColumnBuffer
	sorted  []ColumnBuffer
}

// NewBuffer constructs a new buffer, using the given list of buffer options
// to configure the buffer returned by the function.
//
// The function panics if the buffer configuration is invalid. Programs that
// cannot guarantee the validity of the options passed to NewBuffer should
// construct the buffer configuration independently prior to calling this
// function:
//
//	config, err := parquet.NewRowGroupConfig(options...)
//	if err != nil {
//		// handle the configuration error
//		...
//	} else {
//		// this call to create a buffer is guaranteed not to panic
//		buffer := parquet.NewBuffer(config)
//		...
//	}
func NewBuffer(options ...RowGroupOption) *Buffer {
	config, err := NewRowGroupConfig(options...)
	if err != nil {
		panic(err)
	}
	buf := &Buffer{
		config: config,
	}
	if config.Schema != nil {
		buf.configure(config.Schema)
	}
	return buf
}

func (buf *Buffer) configure(schema *Schema) {
	if schema == nil {
		return
	}
	sortingColumns := buf.config.Sorting.SortingColumns
	buf.sorted = make([]ColumnBuffer, len(sortingColumns))

	forEachLeafColumnOf(schema, func(leaf leafColumn) {
		nullOrdering := nullsGoLast
		columnIndex := int(leaf.columnIndex)
		columnType := leaf.node.Type()
		bufferCap := buf.config.ColumnBufferCapacity
		dictionary := (Dictionary)(nil)
		encoding := encodingOf(leaf.node)

		if isDictionaryEncoding(encoding) {
			estimatedDictBufferSize := columnType.EstimateSize(bufferCap)
			dictBuffer := columnType.NewValues(
				make([]byte, 0, estimatedDictBufferSize),
				nil,
			)
			dictionary = columnType.NewDictionary(columnIndex, 0, dictBuffer)
			columnType = dictionary.Type()
		}

		sortingIndex := searchSortingColumn(sortingColumns, leaf.path)
		if sortingIndex < len(sortingColumns) && sortingColumns[sortingIndex].NullsFirst() {
			nullOrdering = nullsGoFirst
		}

		column := columnType.NewColumnBuffer(columnIndex, bufferCap)
		switch {
		case leaf.maxRepetitionLevel > 0:
			column = newRepeatedColumnBuffer(column, leaf.maxRepetitionLevel, leaf.maxDefinitionLevel, nullOrdering)
		case leaf.maxDefinitionLevel > 0:
			column = newOptionalColumnBuffer(column, leaf.maxDefinitionLevel, nullOrdering)
		}
		buf.columns = append(buf.columns, column)

		if sortingIndex < len(sortingColumns) {
			if sortingColumns[sortingIndex].Descending() {
				column = &reversedColumnBuffer{column}
			}
			buf.sorted[sortingIndex] = column
		}
	})

	buf.schema = schema
	buf.rowbuf = make([]Row, 0, 1)
	buf.colbuf = make([][]Value, len(buf.columns))
	buf.chunks = make([]ColumnChunk, len(buf.columns))

	for i, column := range buf.columns {
		buf.chunks[i] = column
	}
}

// Size returns the estimated size of the buffer in memory (in bytes).
func (buf *Buffer) Size() int64 {
	size := int64(0)
	for _, col := range buf.columns {
		size += col.Size()
	}
	return size
}

// NumRows returns the number of rows written to the buffer.
func (buf *Buffer) NumRows() int64 { return int64(buf.Len()) }

// ColumnChunks returns the buffer columns.
func (buf *Buffer) ColumnChunks() []ColumnChunk { return buf.chunks }

// ColumnBuffer returns the buffer columns.
//
// This method is similar to ColumnChunks, but returns a list of ColumnBuffer
// instead of a ColumnChunk values (the latter being read-only); calling
// ColumnBuffers or ColumnChunks with the same index returns the same underlying
// objects, but with different types, which removes the need for making a type
// assertion if the program needed to write directly to the column buffers.
// The presence of the ColumnChunks method is still required to satisfy the
// RowGroup interface.
func (buf *Buffer) ColumnBuffers() []ColumnBuffer { return buf.columns }

// Schema returns the schema of the buffer.
//
// The schema is either configured by passing a Schema in the option list when
// constructing the buffer, or lazily discovered when the first row is written.
func (buf *Buffer) Schema() *Schema { return buf.schema }

// SortingColumns returns the list of columns by which the buffer will be
// sorted.
//
// The sorting order is configured by passing a SortingColumns option when
// constructing the buffer.
func (buf *Buffer) SortingColumns() []SortingColumn { return buf.config.Sorting.SortingColumns }

// Len returns the number of rows written to the buffer.
func (buf *Buffer) Len() int {
	if len(buf.columns) == 0 {
		return 0
	} else {
		// All columns have the same number of rows.
		return buf.columns[0].Len()
	}
}

// Less returns true if row[i] < row[j] in the buffer.
func (buf *Buffer) Less(i, j int) bool {
	for _, col := range buf.sorted {
		switch {
		case col.Less(i, j):
			return true
		case col.Less(j, i):
			return false
		}
	}
	return false
}

// Swap exchanges the rows at indexes i and j.
func (buf *Buffer) Swap(i, j int) {
	for _, col := range buf.columns {
		col.Swap(i, j)
	}
}

// Reset clears the content of the buffer, allowing it to be reused.
func (buf *Buffer) Reset() {
	for _, col := range buf.columns {
		col.Reset()
	}
}

// Write writes a row held in a Go value to the buffer.
func (buf *Buffer) Write(row interface{}) error {
	if buf.schema == nil {
		buf.configure(SchemaOf(row))
	}

	buf.rowbuf = buf.rowbuf[:1]
	defer clearRows(buf.rowbuf)

	buf.rowbuf[0] = buf.schema.Deconstruct(buf.rowbuf[0], row)
	_, err := buf.WriteRows(buf.rowbuf)
	return err
}

// WriteRows writes parquet rows to the buffer.
func (buf *Buffer) WriteRows(rows []Row) (int, error) {
	defer func() {
		for i, colbuf := range buf.colbuf {
			clearValues(colbuf)
			buf.colbuf[i] = colbuf[:0]
		}
	}()

	if buf.schema == nil {
		return 0, ErrRowGroupSchemaMissing
	}

	for _, row := range rows {
		for _, value := range row {
			columnIndex := value.Column()
			buf.colbuf[columnIndex] = append(buf.colbuf[columnIndex], value)
		}
	}

	for columnIndex, values := range buf.colbuf {
		if _, err := buf.columns[columnIndex].WriteValues(values); err != nil {
			// TODO: an error at this stage will leave the buffer in an invalid
			// state since the row was partially written. Applications are not
			// expected to continue using the buffer after getting an error,
			// maybe we can enforce it?
			return 0, err
		}
	}

	return len(rows), nil
}

// WriteRowGroup satisfies the RowGroupWriter interface.
func (buf *Buffer) WriteRowGroup(rowGroup RowGroup) (int64, error) {
	rowGroupSchema := rowGroup.Schema()
	switch {
	case rowGroupSchema == nil:
		return 0, ErrRowGroupSchemaMissing
	case buf.schema == nil:
		buf.configure(rowGroupSchema)
	case !nodesAreEqual(buf.schema, rowGroupSchema):
		return 0, ErrRowGroupSchemaMismatch
	}
	if !sortingColumnsHavePrefix(rowGroup.SortingColumns(), buf.SortingColumns()) {
		return 0, ErrRowGroupSortingColumnsMismatch
	}
	n := buf.NumRows()
	r := rowGroup.Rows()
	defer r.Close()
	_, err := CopyRows(bufferWriter{buf}, r)
	return buf.NumRows() - n, err
}

// Rows returns a reader exposing the current content of the buffer.
//
// The buffer and the returned reader share memory. Mutating the buffer
// concurrently to reading rows may result in non-deterministic behavior.
func (buf *Buffer) Rows() Rows { return newRowGroupRows(buf, ReadModeSync) }

// bufferWriter is an adapter for Buffer which implements both RowWriter and
// PageWriter to enable optimizations in CopyRows for types that support writing
// rows by copying whole pages instead of calling WriteRow repeatedly.
type bufferWriter struct{ buf *Buffer }

func (w bufferWriter) WriteRows(rows []Row) (int, error) {
	return w.buf.WriteRows(rows)
}

func (w bufferWriter) WriteValues(values []Value) (int, error) {
	return w.buf.columns[values[0].Column()].WriteValues(values)
}

func (w bufferWriter) WritePage(page Page) (int64, error) {
	return CopyValues(w.buf.columns[page.Column()], page.Values())
}

var (
	_ RowGroup       = (*Buffer)(nil)
	_ RowGroupWriter = (*Buffer)(nil)
	_ sort.Interface = (*Buffer)(nil)

	_ RowWriter   = (*bufferWriter)(nil)
	_ PageWriter  = (*bufferWriter)(nil)
	_ ValueWriter = (*bufferWriter)(nil)
)

type buffer struct {
	data  []byte
	refc  uintptr
	pool  *bufferPool
	stack []byte
}

func (b *buffer) refCount() int {
	return int(atomic.LoadUintptr(&b.refc))
}

func (b *buffer) ref() {
	atomic.AddUintptr(&b.refc, +1)
}

func (b *buffer) unref() {
	if atomic.AddUintptr(&b.refc, ^uintptr(0)) == 0 {
		if b.pool != nil {
			b.pool.put(b)
		}
	}
}

func monitorBufferRelease(b *buffer) {
	if rc := b.refCount(); rc != 0 {
		log.Printf("PARQUETGODEBUG: buffer garbage collected with non-zero reference count\n%s", string(b.stack))
	}
}

type bufferPool struct {
	// Buckets are split in two groups for short and large buffers. In the short
	// buffer group (below 256KB), the growth rate between each bucket is 2. The
	// growth rate changes to 1.5 in the larger buffer group.
	//
	// Short buffer buckets:
	// ---------------------
	//   4K, 8K, 16K, 32K, 64K, 128K, 256K
	//
	// Large buffer buckets:
	// ---------------------
	//   364K, 546K, 819K ...
	//
	buckets [bufferPoolBucketCount]sync.Pool
}

func (p *bufferPool) newBuffer(bufferSize, bucketSize int) *buffer {
	b := &buffer{
		data: make([]byte, bufferSize, bucketSize),
		refc: 1,
		pool: p,
	}
	if debug.TRACEBUF > 0 {
		b.stack = make([]byte, 4096)
		runtime.SetFinalizer(b, monitorBufferRelease)
	}
	return b
}

// get returns a buffer from the levelled buffer pool. size is used to choose
// the appropriate pool.
func (p *bufferPool) get(bufferSize int) *buffer {
	bucketIndex, bucketSize := bufferPoolBucketIndexAndSizeOfGet(bufferSize)

	b := (*buffer)(nil)
	if bucketIndex >= 0 {
		b, _ = p.buckets[bucketIndex].Get().(*buffer)
	}

	if b == nil {
		b = p.newBuffer(bufferSize, bucketSize)
	} else {
		b.data = b.data[:bufferSize]
		b.ref()
	}

	if debug.TRACEBUF > 0 {
		b.stack = b.stack[:runtime.Stack(b.stack[:cap(b.stack)], false)]
	}
	return b
}

func (p *bufferPool) put(b *buffer) {
	if b.pool != p {
		panic("BUG: buffer returned to a different pool than the one it was allocated from")
	}
	if b.refCount() != 0 {
		panic("BUG: buffer returned to pool with a non-zero reference count")
	}
	if bucketIndex, _ := bufferPoolBucketIndexAndSizeOfPut(cap(b.data)); bucketIndex >= 0 {
		p.buckets[bucketIndex].Put(b)
	}
}

const (
	bufferPoolBucketCount         = 32
	bufferPoolMinSize             = 4096
	bufferPoolLastShortBucketSize = 262144
)

func bufferPoolNextSize(size int) int {
	if size < bufferPoolLastShortBucketSize {
		return size * 2
	} else {
		return size + (size / 2)
	}
}

func bufferPoolBucketIndexAndSizeOfGet(size int) (int, int) {
	limit := bufferPoolMinSize

	for i := 0; i < bufferPoolBucketCount; i++ {
		if size <= limit {
			return i, limit
		}
		limit = bufferPoolNextSize(limit)
	}

	return -1, size
}

func bufferPoolBucketIndexAndSizeOfPut(size int) (int, int) {
	// When releasing buffers, some may have a capacity that is not one of the
	// bucket sizes (due to the use of append for example). In this case, we
	// have to put the buffer is the highest bucket with a size less or equal
	// to the buffer capacity.
	if limit := bufferPoolMinSize; size >= limit {
		for i := 0; i < bufferPoolBucketCount; i++ {
			n := bufferPoolNextSize(limit)
			if size < n {
				return i, limit
			}
			limit = n
		}
	}
	return -1, size
}

var (
	buffers bufferPool
)

type bufferedPage struct {
	Page
	values           *buffer
	offsets          *buffer
	repetitionLevels *buffer
	definitionLevels *buffer
}

func newBufferedPage(page Page, values, offsets, definitionLevels, repetitionLevels *buffer) *bufferedPage {
	p := &bufferedPage{
		Page:             page,
		values:           values,
		offsets:          offsets,
		definitionLevels: definitionLevels,
		repetitionLevels: repetitionLevels,
	}
	bufferRef(values)
	bufferRef(offsets)
	bufferRef(definitionLevels)
	bufferRef(repetitionLevels)
	return p
}

func (p *bufferedPage) Slice(i, j int64) Page {
	return newBufferedPage(
		p.Page.Slice(i, j),
		p.values,
		p.offsets,
		p.definitionLevels,
		p.repetitionLevels,
	)
}

func (p *bufferedPage) Retain() {
	bufferRef(p.values)
	bufferRef(p.offsets)
	bufferRef(p.definitionLevels)
	bufferRef(p.repetitionLevels)
}

func (p *bufferedPage) Release() {
	bufferUnref(p.values)
	bufferUnref(p.offsets)
	bufferUnref(p.definitionLevels)
	bufferUnref(p.repetitionLevels)
}

func bufferRef(buf *buffer) {
	if buf != nil {
		buf.ref()
	}
}

func bufferUnref(buf *buffer) {
	if buf != nil {
		buf.unref()
	}
}

// Retain is a helper function to increment the reference counter of pages
// backed by memory which can be granularly managed by the application.
//
// Usage of this function is optional and with Release, is intended to allow
// finer grain memory management in the application. Most programs should be
// able to rely on automated memory management provided by the Go garbage
// collector instead.
//
// The function should be called when a page lifetime is about to be shared
// between multiple goroutines or layers of an application, and the program
// wants to express "sharing ownership" of the page.
//
// Calling this function on pages that do not embed a reference counter does
// nothing.
func Retain(page Page) {
	if p, _ := page.(retainable); p != nil {
		p.Retain()
	}
}

// Release is a helper function to decrement the reference counter of pages
// backed by memory which can be granularly managed by the application.
//
// Usage of this is optional and with Retain, is intended to allow finer grained
// memory management in the application, at the expense of potentially causing
// panics if the page is used after its reference count has reached zero. Most
// programs should be able to rely on automated memory management provided by
// the Go garbage collector instead.
//
// The function should be called to return a page to the internal buffer pool,
// when a goroutine "releases ownership" it acquired either by being the single
// owner (e.g. capturing the return value from a ReadPage call) or having gotten
// shared ownership by calling Retain.
//
// Calling this function on pages that do not embed a reference counter does
// nothing.
func Release(page Page) {
	if p, _ := page.(releasable); p != nil {
		p.Release()
	}
}

type retainable interface {
	Retain()
}

type releasable interface {
	Release()
}

var (
	_ retainable = (*bufferedPage)(nil)
	_ releasable = (*bufferedPage)(nil)
)


================================================
FILE: buffer_go18.go
================================================
//go:build go1.18

package parquet

import (
	"reflect"
	"sort"
)

// GenericBuffer is similar to a Buffer but uses a type parameter to define the
// Go type representing the schema of rows in the buffer.
//
// See GenericWriter for details about the benefits over the classic Buffer API.
type GenericBuffer[T any] struct {
	base  Buffer
	write bufferFunc[T]
}

// NewGenericBuffer is like NewBuffer but returns a GenericBuffer[T] suited to write
// rows of Go type T.
//
// The type parameter T should be a map, struct, or any. Any other types will
// cause a panic at runtime. Type checking is a lot more effective when the
// generic parameter is a struct type, using map and interface types is somewhat
// similar to using a Writer.  If using an interface type for the type parameter,
// then providing a schema at instantiation is required.
//
// If the option list may explicitly declare a schema, it must be compatible
// with the schema generated from T.
func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] {
	config, err := NewRowGroupConfig(options...)
	if err != nil {
		panic(err)
	}

	t := typeOf[T]()
	if config.Schema == nil && t != nil {
		config.Schema = schemaOf(dereference(t))
	}

	if config.Schema == nil {
		panic("generic buffer must be instantiated with schema or concrete type.")
	}

	buf := &GenericBuffer[T]{
		base: Buffer{config: config},
	}
	buf.base.configure(config.Schema)
	buf.write = bufferFuncOf[T](t, config.Schema)
	return buf
}

func typeOf[T any]() reflect.Type {
	var v T
	return reflect.TypeOf(v)
}

type bufferFunc[T any] func(*GenericBuffer[T], []T) (int, error)

func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
	if t == nil {
		return (*GenericBuffer[T]).writeRows
	}
	switch t.Kind() {
	case reflect.Interface, reflect.Map:
		return (*GenericBuffer[T]).writeRows

	case reflect.Struct:
		return makeBufferFunc[T](t, schema)

	case reflect.Pointer:
		if e := t.Elem(); e.Kind() == reflect.Struct {
			return makeBufferFunc[T](t, schema)
		}
	}
	panic("cannot create buffer for values of type " + t.String())
}

func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
	writeRows := writeRowsFuncOf(t, schema, nil)
	return func(buf *GenericBuffer[T], rows []T) (n int, err error) {
		err = writeRows(buf.base.columns, makeArrayOf(rows), columnLevels{})
		if err == nil {
			n = len(rows)
		}
		return n, err
	}
}

func (buf *GenericBuffer[T]) Size() int64 {
	return buf.base.Size()
}

func (buf *GenericBuffer[T]) NumRows() int64 {
	return buf.base.NumRows()
}

func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk {
	return buf.base.ColumnChunks()
}

func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer {
	return buf.base.ColumnBuffers()
}

func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn {
	return buf.base.SortingColumns()
}

func (buf *GenericBuffer[T]) Len() int {
	return buf.base.Len()
}

func (buf *GenericBuffer[T]) Less(i, j int) bool {
	return buf.base.Less(i, j)
}

func (buf *GenericBuffer[T]) Swap(i, j int) {
	buf.base.Swap(i, j)
}

func (buf *GenericBuffer[T]) Reset() {
	buf.base.Reset()
}

func (buf *GenericBuffer[T]) Write(rows []T) (int, error) {
	if len(rows) == 0 {
		return 0, nil
	}
	return buf.write(buf, rows)
}

func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) {
	return buf.base.WriteRows(rows)
}

func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) {
	return buf.base.WriteRowGroup(rowGroup)
}

func (buf *GenericBuffer[T]) Rows() Rows {
	return buf.base.Rows()
}

func (buf *GenericBuffer[T]) Schema() *Schema {
	return buf.base.Schema()
}

func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) {
	if cap(buf.base.rowbuf) < len(rows) {
		buf.base.rowbuf = make([]Row, len(rows))
	} else {
		buf.base.rowbuf = buf.base.rowbuf[:len(rows)]
	}
	defer clearRows(buf.base.rowbuf)

	schema := buf.base.Schema()
	for i := range rows {
		buf.base.rowbuf[i] = schema.Deconstruct(buf.base.rowbuf[i], &rows[i])
	}

	return buf.base.WriteRows(buf.base.rowbuf)
}

var (
	_ RowGroup       = (*GenericBuffer[any])(nil)
	_ RowGroupWriter = (*GenericBuffer[any])(nil)
	_ sort.Interface = (*GenericBuffer[any])(nil)

	_ RowGroup       = (*GenericBuffer[struct{}])(nil)
	_ RowGroupWriter = (*GenericBuffer[struct{}])(nil)
	_ sort.Interface = (*GenericBuffer[struct{}])(nil)

	_ RowGroup       = (*GenericBuffer[map[struct{}]struct{}])(nil)
	_ RowGroupWriter = (*GenericBuffer[map[struct{}]struct{}])(nil)
	_ sort.Interface = (*GenericBuffer[map[struct{}]struct{}])(nil)
)


================================================
FILE: buffer_go18_test.go
================================================
//go:build go1.18

package parquet_test

import (
	"encoding/binary"
	"errors"
	"fmt"
	"io"
	"math/rand"
	"reflect"
	"sort"
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestGenericBuffer(t *testing.T) {
	testGenericBuffer[booleanColumn](t)
	testGenericBuffer[int32Column](t)
	testGenericBuffer[int64Column](t)
	testGenericBuffer[int96Column](t)
	testGenericBuffer[floatColumn](t)
	testGenericBuffer[doubleColumn](t)
	testGenericBuffer[byteArrayColumn](t)
	testGenericBuffer[fixedLenByteArrayColumn](t)
	testGenericBuffer[stringColumn](t)
	testGenericBuffer[indexedStringColumn](t)
	testGenericBuffer[uuidColumn](t)
	testGenericBuffer[timeColumn](t)
	testGenericBuffer[timeInMillisColumn](t)
	testGenericBuffer[mapColumn](t)
	testGenericBuffer[decimalColumn](t)
	testGenericBuffer[addressBook](t)
	testGenericBuffer[contact](t)
	testGenericBuffer[listColumn2](t)
	testGenericBuffer[listColumn1](t)
	testGenericBuffer[listColumn0](t)
	testGenericBuffer[nestedListColumn1](t)
	testGenericBuffer[nestedListColumn](t)
	testGenericBuffer[*contact](t)
	testGenericBuffer[paddedBooleanColumn](t)
	testGenericBuffer[optionalInt32Column](t)
	testGenericBuffer[repeatedInt32Column](t)
}

func testGenericBuffer[Row any](t *testing.T) {
	var model Row
	t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) {
		err := quickCheck(func(rows []Row) bool {
			if len(rows) == 0 {
				return true // TODO: fix support for parquet files with zero rows
			}
			if err := testGenericBufferRows(rows); err != nil {
				t.Error(err)
				return false
			}
			return true
		})
		if err != nil {
			t.Error(err)
		}
	})
}

func testGenericBufferRows[Row any](rows []Row) error {
	setNullPointers(rows)
	buffer := parquet.NewGenericBuffer[Row]()
	_, err := buffer.Write(rows)
	if err != nil {
		return err
	}
	reader := parquet.NewGenericRowGroupReader[Row](buffer)
	result := make([]Row, len(rows))
	n, err := reader.Read(result)
	if err != nil && !errors.Is(err, io.EOF) {
		return err
	}
	if n < len(rows) {
		return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n)
	}
	if !reflect.DeepEqual(rows, result) {
		return fmt.Errorf("rows mismatch:\nwant: %#v\ngot:  %#v", rows, result)
	}
	return nil
}

func setNullPointers[Row any](rows []Row) {
	if len(rows) > 0 && reflect.TypeOf(rows[0]).Kind() == reflect.Pointer {
		for i := range rows {
			v := reflect.ValueOf(&rows[i]).Elem()
			if v.IsNil() {
				v.Set(reflect.New(v.Type().Elem()))
			}
		}
	}
}

type generator[T any] interface {
	generate(*rand.Rand) T
}

func BenchmarkGenericBuffer(b *testing.B) {
	benchmarkGenericBuffer[benchmarkRowType](b)
	benchmarkGenericBuffer[booleanColumn](b)
	benchmarkGenericBuffer[int32Column](b)
	benchmarkGenericBuffer[int64Column](b)
	benchmarkGenericBuffer[floatColumn](b)
	benchmarkGenericBuffer[doubleColumn](b)
	benchmarkGenericBuffer[byteArrayColumn](b)
	benchmarkGenericBuffer[fixedLenByteArrayColumn](b)
	benchmarkGenericBuffer[stringColumn](b)
	benchmarkGenericBuffer[indexedStringColumn](b)
	benchmarkGenericBuffer[uuidColumn](b)
	benchmarkGenericBuffer[timeColumn](b)
	benchmarkGenericBuffer[timeInMillisColumn](b)
	benchmarkGenericBuffer[mapColumn](b)
	benchmarkGenericBuffer[decimalColumn](b)
	benchmarkGenericBuffer[contact](b)
	benchmarkGenericBuffer[paddedBooleanColumn](b)
	benchmarkGenericBuffer[optionalInt32Column](b)
	benchmarkGenericBuffer[repeatedInt32Column](b)
}

func benchmarkGenericBuffer[Row generator[Row]](b *testing.B) {
	var model Row
	b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) {
		prng := rand.New(rand.NewSource(0))
		rows := make([]Row, benchmarkNumRows)
		for i := range rows {
			rows[i] = rows[i].generate(prng)
		}

		b.Run("go1.17", func(b *testing.B) {
			buffer := parquet.NewBuffer(parquet.SchemaOf(rows[0]))
			i := 0
			benchmarkRowsPerSecond(b, func() int {
				for j := 0; j < benchmarkRowsPerStep; j++ {
					if err := buffer.Write(&rows[i]); err != nil {
						b.Fatal(err)
					}
				}

				i += benchmarkRowsPerStep
				i %= benchmarkNumRows

				if i == 0 {
					buffer.Reset()
				}
				return benchmarkRowsPerStep
			})
		})

		b.Run("go1.18", func(b *testing.B) {
			buffer := parquet.NewGenericBuffer[Row]()
			i := 0
			benchmarkRowsPerSecond(b, func() int {
				n, err := buffer.Write(rows[i : i+benchmarkRowsPerStep])
				if err != nil {
					b.Fatal(err)
				}

				i += benchmarkRowsPerStep
				i %= benchmarkNumRows

				if i == 0 {
					buffer.Reset()
				}
				return n
			})
		})
	})
}

func TestIssue327(t *testing.T) {
	t.Run("untagged nested lists should panic", func(t *testing.T) {
		type testType struct {
			ListOfLists [][]int
		}

		defer func() {
			if r := recover(); r == nil {
				t.Errorf("Nested lists without the list tag should panic")
			}
		}()

		_ = parquet.NewGenericBuffer[testType]()
	})
}

func TestIssue346(t *testing.T) {
	type TestType struct {
		Key int
	}

	schema := parquet.SchemaOf(TestType{})
	buffer := parquet.NewGenericBuffer[any](schema)

	data := make([]any, 1)
	data[0] = TestType{Key: 0}
	_, _ = buffer.Write(data)
}

func TestIssue347(t *testing.T) {
	type TestType struct {
		Key int
	}

	// instantiating with concrete type shouldn't panic
	_ = parquet.NewGenericBuffer[TestType]()

	// instantiating with schema and interface type parameter shouldn't panic
	schema := parquet.SchemaOf(TestType{})
	_ = parquet.NewGenericBuffer[any](schema)

	defer func() {
		if r := recover(); r == nil {
			t.Errorf("instantiating generic buffer without schema and with interface " +
				"type parameter should panic")
		}
	}()
	_ = parquet.NewGenericBuffer[any]()
}

func BenchmarkSortGenericBuffer(b *testing.B) {
	type Row struct {
		I0 int64
		I1 int64
		I2 int64
		I3 int64
		I4 int64
		I5 int64
		I6 int64
		I7 int64
		I8 int64
		I9 int64
		ID [16]byte
	}

	buf := parquet.NewGenericBuffer[Row](
		parquet.SortingRowGroupConfig(
			parquet.SortingColumns(
				parquet.Ascending("ID"),
			),
		),
	)

	rows := make([]Row, 10e3)
	prng := rand.New(rand.NewSource(0))

	for i := range rows {
		binary.LittleEndian.PutUint64(rows[i].ID[:8], uint64(i))
		binary.LittleEndian.PutUint64(rows[i].ID[8:], ^uint64(i))
	}

	buf.Write(rows)
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		for j := 0; j < 10; j++ {
			buf.Swap(prng.Intn(len(rows)), prng.Intn(len(rows)))
		}

		sort.Sort(buf)
	}
}


================================================
FILE: buffer_internal_test.go
================================================
package parquet

import (
	"fmt"
	"math/rand"
	"testing"
)

func TestBufferAlwaysCorrectSize(t *testing.T) {
	var p bufferPool
	for i := 0; i < 1000; i++ {
		n := rand.Intn(1024 * 1024)
		b := p.get(n)
		if len(b.data) != n {
			t.Fatalf("Expected buffer of size %d, got %d", n, len(b.data))
		}
		b.unref()
	}
}

func TestBufferPoolBucketIndexAndSizeOf(t *testing.T) {
	tests := []struct {
		size        int
		bucketIndex int
		bucketSize  int
	}{
		{size: 0, bucketIndex: 0, bucketSize: 4096},
		{size: 1, bucketIndex: 0, bucketSize: 4096},
		{size: 2049, bucketIndex: 0, bucketSize: 4096},
		{size: 4096, bucketIndex: 0, bucketSize: 4096},
		{size: 4097, bucketIndex: 1, bucketSize: 8192},
		{size: 8192, bucketIndex: 1, bucketSize: 8192},
		{size: 8193, bucketIndex: 2, bucketSize: 16384},
		{size: 16384, bucketIndex: 2, bucketSize: 16384},
		{size: 16385, bucketIndex: 3, bucketSize: 32768},
		{size: 32768, bucketIndex: 3, bucketSize: 32768},
		{size: 32769, bucketIndex: 4, bucketSize: 65536},
		{size: 262143, bucketIndex: 6, bucketSize: 262144},
		{size: 262144, bucketIndex: 6, bucketSize: 262144},
		{size: 262145, bucketIndex: 7, bucketSize: 393216},
	}

	for _, test := range tests {
		t.Run(fmt.Sprintf("size=%d", test.size), func(t *testing.T) {
			bucketIndex, bucketSize := bufferPoolBucketIndexAndSizeOfGet(test.size)

			if bucketIndex != test.bucketIndex {
				t.Errorf("wrong bucket index, want %d but got %d", test.bucketIndex, bucketIndex)
			}

			if bucketSize != test.bucketSize {
				t.Errorf("wrong bucket size, want %d but got %d", test.bucketSize, bucketSize)
			}
		})
	}
}


================================================
FILE: buffer_pool.go
================================================
package parquet

import (
	"fmt"
	"io"
	"os"
	"path/filepath"
	"sync"
)

// BufferPool is an interface abstracting the underlying implementation of
// page buffer pools.
//
// The parquet-go package provides two implementations of this interface, one
// backed by in-memory buffers (on the Go heap), and the other using temporary
// files on disk.
//
// Applications which need finer grain control over the allocation and retention
// of page buffers may choose to provide their own implementation and install it
// via the parquet.ColumnPageBuffers writer option.
//
// BufferPool implementations must be safe to use concurrently from multiple
// goroutines.
type BufferPool interface {
	// GetBuffer is called when a parquet writer needs to acquire a new
	// page buffer from the pool.
	GetBuffer() io.ReadWriteSeeker

	// PutBuffer is called when a parquet writer releases a page buffer to
	// the pool.
	//
	// The parquet.Writer type guarantees that the buffers it calls this method
	// with were previously acquired by a call to GetBuffer on the same
	// pool, and that it will not use them anymore after the call.
	PutBuffer(io.ReadWriteSeeker)
}

// NewBufferPool creates a new in-memory page buffer pool.
//
// The implementation is backed by sync.Pool and allocates memory buffers on the
// Go heap.
func NewBufferPool() BufferPool { return new(memoryBufferPool) }

type memoryBuffer struct {
	data []byte
	off  int
}

func (p *memoryBuffer) Reset() {
	p.data, p.off = p.data[:0], 0
}

func (p *memoryBuffer) Read(b []byte) (n int, err error) {
	n = copy(b, p.data[p.off:])
	p.off += n
	if p.off == len(p.data) {
		err = io.EOF
	}
	return n, err
}

func (p *memoryBuffer) Write(b []byte) (int, error) {
	n := copy(p.data[p.off:cap(p.data)], b)
	p.data = p.data[:p.off+n]

	if n < len(b) {
		p.data = append(p.data, b[n:]...)
	}

	p.off += len(b)
	return len(b), nil
}

func (p *memoryBuffer) WriteTo(w io.Writer) (int64, error) {
	n, err := w.Write(p.data[p.off:])
	p.off += n
	return int64(n), err
}

func (p *memoryBuffer) Seek(offset int64, whence int) (int64, error) {
	switch whence {
	case io.SeekCurrent:
		offset += int64(p.off)
	case io.SeekEnd:
		offset += int64(len(p.data))
	}
	if offset < 0 {
		return 0, fmt.Errorf("seek: negative offset: %d<0", offset)
	}
	if offset > int64(len(p.data)) {
		offset = int64(len(p.data))
	}
	p.off = int(offset)
	return offset, nil
}

type memoryBufferPool struct{ sync.Pool }

func (pool *memoryBufferPool) GetBuffer() io.ReadWriteSeeker {
	b, _ := pool.Get().(*memoryBuffer)
	if b == nil {
		b = new(memoryBuffer)
	} else {
		b.Reset()
	}
	return b
}

func (pool *memoryBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
	if b, _ := buf.(*memoryBuffer); b != nil {
		pool.Put(b)
	}
}

type fileBufferPool struct {
	err     error
	tempdir string
	pattern string
}

// NewFileBufferPool creates a new on-disk page buffer pool.
func NewFileBufferPool(tempdir, pattern string) BufferPool {
	pool := &fileBufferPool{
		tempdir: tempdir,
		pattern: pattern,
	}
	pool.tempdir, pool.err = filepath.Abs(pool.tempdir)
	return pool
}

func (pool *fileBufferPool) GetBuffer() io.ReadWriteSeeker {
	if pool.err != nil {
		return &errorBuffer{err: pool.err}
	}
	f, err := os.CreateTemp(pool.tempdir, pool.pattern)
	if err != nil {
		return &errorBuffer{err: err}
	}
	return f
}

func (pool *fileBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
	if f, _ := buf.(*os.File); f != nil {
		defer f.Close()
		os.Remove(f.Name())
	}
}

type errorBuffer struct{ err error }

func (buf *errorBuffer) Read([]byte) (int, error)          { return 0, buf.err }
func (buf *errorBuffer) Write([]byte) (int, error)         { return 0, buf.err }
func (buf *errorBuffer) ReadFrom(io.Reader) (int64, error) { return 0, buf.err }
func (buf *errorBuffer) WriteTo(io.Writer) (int64, error)  { return 0, buf.err }
func (buf *errorBuffer) Seek(int64, int) (int64, error)    { return 0, buf.err }

var (
	defaultColumnBufferPool  memoryBufferPool
	defaultSortingBufferPool memoryBufferPool

	_ io.ReaderFrom = (*errorBuffer)(nil)
	_ io.WriterTo   = (*errorBuffer)(nil)
	_ io.WriterTo   = (*memoryBuffer)(nil)
)

type readerAt struct {
	reader io.ReadSeeker
	offset int64
}

func (r *readerAt) ReadAt(b []byte, off int64) (int, error) {
	if r.offset < 0 || off != r.offset {
		off, err := r.reader.Seek(off, io.SeekStart)
		if err != nil {
			return 0, err
		}
		r.offset = off
	}
	n, err := r.reader.Read(b)
	r.offset += int64(n)
	return n, err
}

func newReaderAt(r io.ReadSeeker) io.ReaderAt {
	if rr, ok := r.(io.ReaderAt); ok {
		return rr
	}
	return &readerAt{reader: r, offset: -1}
}


================================================
FILE: buffer_pool_test.go
================================================
package parquet_test

import (
	"bytes"
	"io"
	"strings"
	"testing"
	"testing/iotest"

	"github.com/segmentio/parquet-go"
)

func TestBufferPool(t *testing.T) {
	testBufferPool(t, parquet.NewBufferPool())
}

func TestFileBufferPool(t *testing.T) {
	testBufferPool(t, parquet.NewFileBufferPool("/tmp", "buffers.*"))
}

func testBufferPool(t *testing.T, pool parquet.BufferPool) {
	tests := []struct {
		scenario string
		function func(*testing.T, parquet.BufferPool)
	}{
		{
			scenario: "write bytes",
			function: testBufferPoolWriteBytes,
		},

		{
			scenario: "write string",
			function: testBufferPoolWriteString,
		},

		{
			scenario: "copy to buffer",
			function: testBufferPoolCopyToBuffer,
		},

		{
			scenario: "copy from buffer",
			function: testBufferPoolCopyFromBuffer,
		},
	}

	for _, test := range tests {
		t.Run(test.scenario, func(t *testing.T) { test.function(t, pool) })
	}
}

func testBufferPoolWriteBytes(t *testing.T, pool parquet.BufferPool) {
	const content = "Hello World!"

	buffer := pool.GetBuffer()
	defer pool.PutBuffer(buffer)

	_, err := buffer.Write([]byte(content))
	if err != nil {
		t.Fatal(err)
	}
	assertBufferContent(t, buffer, content)
}

func testBufferPoolWriteString(t *testing.T, pool parquet.BufferPool) {
	const content = "Hello World!"

	buffer := pool.GetBuffer()
	defer pool.PutBuffer(buffer)

	_, err := io.WriteString(buffer, content)
	if err != nil {
		t.Fatal(err)
	}

	assertBufferContent(t, buffer, content)
}

func testBufferPoolCopyToBuffer(t *testing.T, pool parquet.BufferPool) {
	const content = "ABC"

	buffer := pool.GetBuffer()
	defer pool.PutBuffer(buffer)

	reader := strings.NewReader(content)
	_, err := io.Copy(buffer, struct{ io.Reader }{reader})
	if err != nil {
		t.Fatal(err)
	}

	assertBufferContent(t, buffer, content)
}

func testBufferPoolCopyFromBuffer(t *testing.T, pool parquet.BufferPool) {
	const content = "0123456789"

	buffer := pool.GetBuffer()
	defer pool.PutBuffer(buffer)

	if _, err := io.WriteString(buffer, content); err != nil {
		t.Fatal(err)
	}
	if _, err := buffer.Seek(0, io.SeekStart); err != nil {
		t.Fatal(err)
	}

	writer := new(bytes.Buffer)
	_, err := io.Copy(struct{ io.Writer }{writer}, buffer)
	if err != nil {
		t.Fatal(err)
	}

	assertBufferContent(t, bytes.NewReader(writer.Bytes()), content)
}

func assertBufferContent(t *testing.T, b io.ReadSeeker, s string) {
	t.Helper()

	offset, err := b.Seek(0, io.SeekStart)
	if err != nil {
		t.Error("seek:", err)
	}
	if offset != 0 {
		t.Errorf("seek: invalid offset returned: want=0 got=%d", offset)
	}
	if err := iotest.TestReader(b, []byte(s)); err != nil {
		t.Error("iotest:", err)
	}
}


================================================
FILE: buffer_test.go
================================================
package parquet_test

import (
	"bytes"
	"errors"
	"io"
	"math"
	"math/rand"
	"reflect"
	"sort"
	"strconv"
	"testing"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/encoding"
)

var bufferTests = [...]struct {
	scenario string
	typ      parquet.Type
	values   [][]interface{}
}{
	{
		scenario: "boolean",
		typ:      parquet.BooleanType,
		values: [][]interface{}{
			{},
			{false},
			{true},
			{
				false, true, false, false, true, true,
				false, false, false, true, false, true,
			},
		},
	},

	{
		scenario: "int32",
		typ:      parquet.Int32Type,
		values: [][]interface{}{
			{},
			{int32(0)},
			{int32(1)},
			{
				int32(1), int32(2), int32(3), int32(4), int32(5), int32(6),
				int32(math.MaxInt8), int32(math.MaxInt16), int32(math.MaxInt32),
				int32(7), int32(9), int32(9), int32(0),
			},
		},
	},

	{
		scenario: "int64",
		typ:      parquet.Int64Type,
		values: [][]interface{}{
			{},
			{int64(0)},
			{int64(1)},
			{
				int64(1), int64(2), int64(3), int64(4), int64(5), int64(6),
				int64(math.MaxInt8), int64(math.MaxInt16), int64(math.MaxInt64), int64(7),
				int64(9), int64(9), int64(0),
			},
		},
	},

	{
		scenario: "float",
		typ:      parquet.FloatType,
		values: [][]interface{}{
			{},
			{float32(0)},
			{float32(1)},
			{
				float32(1), float32(2), float32(3), float32(4), float32(5), float32(6),
				float32(0.5), float32(math.SmallestNonzeroFloat32), float32(math.MaxFloat32), float32(7),
				float32(9), float32(9), float32(0),
			},
		},
	},

	{
		scenario: "double",
		typ:      parquet.DoubleType,
		values: [][]interface{}{
			{},
			{float64(0)},
			{float64(1)},
			{
				float64(1), float64(2), float64(3), float64(4), float64(5), float64(6),
				float64(0.5), float64(math.SmallestNonzeroFloat64), float64(math.MaxFloat64), float64(7),
				float64(9), float64(9), float64(0),
			},
		},
	},

	{
		scenario: "string",
		typ:      parquet.ByteArrayType,
		values: [][]interface{}{
			{},
			{""},
			{"Hello World!"},
			{
				"ABCDEFG", "HIJKLMN", "OPQRSTU", "VWXZY01", "2345678",
				"90!@#$%", "^&*()_+", "Hello World!", "Answer=42", "ABCEDFG",
				"HIJKLMN", "OPQRSTU", "VWXYZ",
			},
		},
	},

	{
		scenario: "fixed length byte array",
		typ:      parquet.FixedLenByteArrayType(10),
		values: [][]interface{}{
			{},
			{[10]byte{}},
			{[10]byte{0: 1}},
			{
				[10]byte{0: 0}, [10]byte{0: 2}, [10]byte{0: 1}, [10]byte{0: 4}, [10]byte{0: 3},
				[10]byte{0: 6}, [10]byte{0: 5}, [10]byte{0: 8}, [10]byte{0: 7}, [10]byte{0: 10},
				[10]byte{0: 11}, [10]byte{0: 12}, [10]byte{9: 0xFF},
			},
		},
	},

	{
		scenario: "uuid",
		typ:      parquet.UUID().Type(),
		values: [][]interface{}{
			{},
			{[16]byte{}},
			{[16]byte{0: 1}},
			{
				[16]byte{0: 0}, [16]byte{0: 2}, [16]byte{0: 1}, [16]byte{0: 4}, [16]byte{0: 3},
				[16]byte{0: 6}, [16]byte{0: 5}, [16]byte{0: 8}, [16]byte{0: 7}, [16]byte{0: 10},
				[16]byte{0: 11}, [16]byte{0: 12}, [16]byte{15: 0xFF},
			},
		},
	},

	{
		scenario: "uint32",
		typ:      parquet.Uint(32).Type(),
		values: [][]interface{}{
			{},
			{uint32(0)},
			{uint32(1)},
			{
				uint32(1), uint32(2), uint32(3), uint32(4), uint32(5), uint32(6),
				uint32(math.MaxInt8), uint32(math.MaxInt16), uint32(math.MaxUint32), uint32(7),
				uint32(9), uint32(9), uint32(0),
			},
		},
	},

	{
		scenario: "uint64",
		typ:      parquet.Uint(64).Type(),
		values: [][]interface{}{
			{},
			{uint64(0)},
			{uint64(1)},
			{
				uint64(1), uint64(2), uint64(3), uint64(4), uint64(5), uint64(6),
				uint64(math.MaxInt8), uint64(math.MaxInt16), uint64(math.MaxUint64),
				uint64(7), uint64(9), uint64(9), uint64(0),
			},
		},
	},
}

func TestBuffer(t *testing.T) {
	for _, test := range bufferTests {
		t.Run(test.scenario, func(t *testing.T) {
			for _, config := range [...]struct {
				scenario string
				typ      parquet.Type
			}{
				{scenario: "plain", typ: test.typ},
				{scenario: "indexed", typ: test.typ.NewDictionary(0, 0, test.typ.NewValues(nil, nil)).Type()},
			} {
				t.Run(config.scenario, func(t *testing.T) {
					for _, mod := range [...]struct {
						scenario string
						function func(parquet.Node) parquet.Node
					}{
						{scenario: "optional", function: parquet.Optional},
						{scenario: "repeated", function: parquet.Repeated},
						{scenario: "required", function: parquet.Required},
					} {
						t.Run(mod.scenario, func(t *testing.T) {
							for _, ordering := range [...]struct {
								scenario string
								sorting  parquet.SortingColumn
								sortFunc func(parquet.Type, []parquet.Value)
							}{
								{scenario: "unordered", sorting: nil, sortFunc: unordered},
								{scenario: "ascending", sorting: parquet.Ascending("data"), sortFunc: ascending},
								{scenario: "descending", sorting: parquet.Descending("data"), sortFunc: descending},
							} {
								t.Run(ordering.scenario, func(t *testing.T) {
									schema := parquet.NewSchema("test", parquet.Group{
										"data": mod.function(parquet.Leaf(config.typ)),
									})

									options := []parquet.RowGroupOption{
										schema,
										parquet.ColumnBufferCapacity(100),
									}
									if ordering.sorting != nil {
										options = append(options,
											parquet.SortingRowGroupConfig(
												parquet.SortingColumns(ordering.sorting),
											),
										)
									}

									content := new(bytes.Buffer)
									buffer := parquet.NewBuffer(options...)

									for _, values := range test.values {
										t.Run("", func(t *testing.T) {
											defer content.Reset()
											defer buffer.Reset()
											fields := schema.Fields()
											testBuffer(t, fields[0], buffer, &parquet.Plain, values, ordering.sortFunc)
										})
									}
								})
							}
						})
					}
				})
			}
		})
	}
}

type sortFunc func(parquet.Type, []parquet.Value)

func unordered(typ parquet.Type, values []parquet.Value) {}

func ascending(typ parquet.Type, values []parquet.Value) {
	sort.Slice(values, func(i, j int) bool { return typ.Compare(values[i], values[j]) < 0 })
}

func descending(typ parquet.Type, values []parquet.Value) {
	sort.Slice(values, func(i, j int) bool { return typ.Compare(values[i], values[j]) > 0 })
}

func testBuffer(t *testing.T, node parquet.Node, buffer *parquet.Buffer, encoding encoding.Encoding, values []interface{}, sortFunc sortFunc) {
	repetitionLevel := 0
	definitionLevel := 0
	if !node.Required() {
		definitionLevel = 1
	}

	minValue := parquet.Value{}
	maxValue := parquet.Value{}
	batch := make([]parquet.Value, len(values))
	for i := range values {
		batch[i] = parquet.ValueOf(values[i]).Level(repetitionLevel, definitionLevel, 0)
	}

	for i := range batch {
		_, err := buffer.WriteRows([]parquet.Row{batch[i : i+1]})
		if err != nil {
			t.Fatalf("writing value to row group: %v", err)
		}
	}

	numRows := buffer.NumRows()
	if numRows != int64(len(batch)) {
		t.Fatalf("number of rows mismatch: want=%d got=%d", len(batch), numRows)
	}

	typ := node.Type()
	for _, value := range batch {
		if minValue.IsNull() || typ.Compare(value, minValue) < 0 {
			minValue = value
		}
		if maxValue.IsNull() || typ.Compare(value, maxValue) > 0 {
			maxValue = value
		}
	}

	sortFunc(typ, batch)
	sort.Sort(buffer)

	page := buffer.ColumnBuffers()[0].Page()
	numValues := page.NumValues()
	if numValues != int64(len(batch)) {
		t.Fatalf("number of values mistmatch: want=%d got=%d", len(batch), numValues)
	}

	numNulls := page.NumNulls()
	if numNulls != 0 {
		t.Fatalf("number of nulls mismatch: want=0 got=%d", numNulls)
	}

	min, max, hasBounds := page.Bounds()
	if !hasBounds && numRows > 0 {
		t.Fatal("page bounds are missing")
	}
	if !parquet.Equal(min, minValue) {
		t.Fatalf("min value mismatch: want=%v got=%v", minValue, min)
	}
	if !parquet.Equal(max, maxValue) {
		t.Fatalf("max value mismatch: want=%v got=%v", maxValue, max)
	}

	// We write a single value per row, so num values = num rows for all pages
	// including repeated ones, which makes it OK to slice the pages using the
	// number of values as a proxy for the row indexes.
	halfValues := numValues / 2

	for _, test := range [...]struct {
		scenario string
		values   []parquet.Value
		reader   parquet.ValueReader
	}{
		{"page", batch, page.Values()},
		{"head", batch[:halfValues], page.Slice(0, halfValues).Values()},
		{"tail", batch[halfValues:], page.Slice(halfValues, numValues).Values()},
	} {
		v := [1]parquet.Value{}
		i := 0

		for {
			n, err := test.reader.ReadValues(v[:])
			if n > 0 {
				if n != 1 {
					t.Fatalf("reading value from %q reader returned the wrong count: want=1 got=%d", test.scenario, n)
				}
				if i < len(test.values) {
					if !parquet.Equal(v[0], test.values[i]) {
						t.Fatalf("%q value at index %d mismatches: want=%v got=%v", test.scenario, i, test.values[i], v[0])
					}
				}
				i++
			}
			if err != nil {
				if err == io.EOF {
					break
				}
				t.Fatalf("reading value from %q reader: %v", test.scenario, err)
			}
		}

		if i != len(test.values) {
			t.Errorf("wrong number of values read from %q reader: want=%d got=%d", test.scenario, len(test.values), i)
		}
	}
}

func TestBufferGenerateBloomFilters(t *testing.T) {
	type Point3D struct {
		X float64
		Y float64
		Z float64
	}

	f := func(rows []Point3D) bool {
		if len(rows) == 0 { // TODO: support writing files with no rows
			return true
		}

		output := new(bytes.Buffer)
		buffer := parquet.NewBuffer()
		writer := parquet.NewWriter(output,
			parquet.BloomFilters(
				parquet.SplitBlockFilter(10, "X"),
				parquet.SplitBlockFilter(10, "Y"),
				parquet.SplitBlockFilter(10, "Z"),
			),
		)
		for i := range rows {
			buffer.Write(&rows[i])
		}
		_, err := copyRowsAndClose(writer, buffer.Rows())
		if err != nil {
			t.Error(err)
			return false
		}
		if err := writer.Close(); err != nil {
			t.Error(err)
			return false
		}

		reader := bytes.NewReader(output.Bytes())
		f, err := parquet.OpenFile(reader, reader.Size())
		if err != nil {
			t.Error(err)
			return false
		}
		rowGroup := f.RowGroups()[0]
		columns := rowGroup.ColumnChunks()
		x := columns[0]
		y := columns[1]
		z := columns[2]

		for i, col := range []parquet.ColumnChunk{x, y, z} {
			if col.BloomFilter() == nil {
				t.Errorf("column %d has no bloom filter despite being configured to have one", i)
				return false
			}
		}

		fx := x.BloomFilter()
		fy := y.BloomFilter()
		fz := z.BloomFilter()

		test := func(f parquet.BloomFilter, v float64) bool {
			if ok, err := f.Check(parquet.ValueOf(v)); err != nil {
				t.Errorf("unexpected error checking bloom filter: %v", err)
				return false
			} else if !ok {
				t.Errorf("bloom filter does not contain value %g", v)
				return false
			}
			return true
		}

		for _, row := range rows {
			if !test(fx, row.X) || !test(fy, row.Y) || !test(fz, row.Z) {
				return false
			}
		}

		return true
	}

	if err := quickCheck(f); err != nil {
		t.Error(err)
	}
}

func TestBufferRoundtripNestedRepeated(t *testing.T) {
	type C struct {
		D int
	}
	type B struct {
		C []C
	}
	type A struct {
		B []B
	}

	// Write enough objects to exceed first page
	buffer := parquet.NewBuffer()
	var objs []A
	for i := 0; i < 6; i++ {
		o := A{[]B{{[]C{
			{i},
			{i},
		}}}}
		buffer.Write(&o)
		objs = append(objs, o)
	}

	buf := new(bytes.Buffer)
	w := parquet.NewWriter(buf, parquet.PageBufferSize(100))
	w.WriteRowGroup(buffer)
	w.Flush()
	w.Close()

	file := bytes.NewReader(buf.Bytes())
	r := parquet.NewReader(file)
	for i := 0; ; i++ {
		o := new(A)
		err := r.Read(o)
		if errors.Is(err, io.EOF) {
			if i < len(objs) {
				t.Errorf("too few rows were read: %d<%d", i, len(objs))
			}
			break
		}
		if !reflect.DeepEqual(*o, objs[i]) {
			t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
		}
	}
}

func TestBufferRoundtripNestedRepeatedPointer(t *testing.T) {
	type C struct {
		D *int
	}
	type B struct {
		C []C
	}
	type A struct {
		B []B
	}

	// Write enough objects to exceed first page
	buffer := parquet.NewBuffer()
	var objs []A
	for i := 0; i < 6; i++ {
		j := i
		o := A{[]B{{[]C{
			{&j},
			{nil},
		}}}}
		buffer.Write(&o)
		objs = append(objs, o)
	}

	buf := new(bytes.Buffer)
	w := parquet.NewWriter(buf, parquet.PageBufferSize(100))
	w.WriteRowGroup(buffer)
	w.Flush()
	w.Close()

	file := bytes.NewReader(buf.Bytes())
	r := parquet.NewReader(file)
	for i := 0; ; i++ {
		o := new(A)
		err := r.Read(o)
		if err == io.EOF {
			break
		}
		if !reflect.DeepEqual(*o, objs[i]) {
			t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
		}
	}
}

func TestRoundtripNestedRepeatedBytes(t *testing.T) {
	type B struct {
		C []byte
	}
	type A struct {
		A string
		B []B
	}

	var objs []A
	for i := 0; i < 2; i++ {
		o := A{
			"test" + strconv.Itoa(i),
			[]B{
				{[]byte{byte(i)}},
			},
		}
		objs = append(objs, o)
	}

	buf := new(bytes.Buffer)
	w := parquet.NewWriter(buf, parquet.PageBufferSize(100))
	for _, o := range objs {
		w.Write(&o)
	}
	w.Close()

	file := bytes.NewReader(buf.Bytes())

	r := parquet.NewReader(file)
	for i := 0; ; i++ {
		o := new(A)
		err := r.Read(o)
		if errors.Is(err, io.EOF) {
			if i < len(objs) {
				t.Errorf("too few rows were read: %d<%d", i, len(objs))
			}
			break
		}
		if !reflect.DeepEqual(*o, objs[i]) {
			t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
		}
	}
}

func TestBufferSeekToRow(t *testing.T) {
	type B struct {
		I int
		C []string
	}
	type A struct {
		B []B
	}

	buffer := parquet.NewBuffer()
	var objs []A
	for i := 0; i < 2; i++ {
		o := A{
			B: []B{
				{I: i, C: []string{"foo", strconv.Itoa(i)}},
				{I: i + 1, C: []string{"bar", strconv.Itoa(i + 1)}},
			},
		}
		buffer.Write(&o)
		objs = append(objs, o)
	}

	buf := new(bytes.Buffer)
	w := parquet.NewWriter(buf)
	w.WriteRowGroup(buffer)
	w.Flush()
	w.Close()

	file := bytes.NewReader(buf.Bytes())
	r := parquet.NewReader(file)

	i := 1
	o := new(A)
	if err := r.SeekToRow(int64(i)); err != nil {
		t.Fatal(err)
	}
	if err := r.Read(o); err != nil {
		t.Fatal(err)
	}
	if !reflect.DeepEqual(*o, objs[i]) {
		t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
	}
}

type TestStruct struct {
	A *string `parquet:"a,optional,dict"`
}

func TestOptionalDictWriteRowGroup(t *testing.T) {
	s := parquet.SchemaOf(&TestStruct{})

	str1 := "test1"
	str2 := "test2"
	records := []*TestStruct{
		{A: nil},
		{A: &str1},
		{A: nil},
		{A: &str2},
		{A: nil},
	}

	buf := parquet.NewBuffer(s)
	for _, rec := range records {
		row := s.Deconstruct(nil, rec)
		_, err := buf.WriteRows([]parquet.Row{row})
		if err != nil {
			t.Fatal(err)
		}
	}

	b := bytes.NewBuffer(nil)
	w := parquet.NewWriter(b)
	_, err := w.WriteRowGroup(buf)
	if err != nil {
		t.Fatal(err)
	}
}

func TestNullsSortFirst(t *testing.T) {
	s := parquet.SchemaOf(&TestStruct{})

	str1 := "test1"
	str2 := "test2"
	records := []*TestStruct{
		{A: &str1},
		{A: nil},
		{A: &str2},
	}
	buf := parquet.NewBuffer(
		s,
		parquet.SortingRowGroupConfig(parquet.SortingColumns(parquet.NullsFirst(parquet.Ascending(s.Columns()[0][0])))),
	)
	for _, rec := range records {
		row := s.Deconstruct(nil, rec)
		_, err := buf.WriteRows([]parquet.Row{row})
		if err != nil {
			t.Fatal(err)
		}
	}

	sort.Sort(buf)

	rows := buf.Rows()
	defer rows.Close()
	rowBuf := make([]parquet.Row, len(records))
	if _, err := rows.ReadRows(rowBuf); err != nil {
		t.Fatal(err)
	}

	resultRecords := make([]TestStruct, len(records))
	for i, r := range rowBuf {
		if err := s.Reconstruct(&resultRecords[i], r); err != nil {
			t.Fatal(err)
		}
	}

	if resultRecords[0].A != nil {
		t.Fatal("expected null to sort first, but found", resultRecords)
	}
}

func generateBenchmarkBufferRows(n int) (*parquet.Schema, []parquet.Row) {
	model := new(benchmarkRowType)
	schema := parquet.SchemaOf(model)
	prng := rand.New(rand.NewSource(0))
	rows := make([]parquet.Row, n)

	for i := range rows {
		io.ReadFull(prng, model.ID[:])
		model.Value = prng.Float64()
		rows[i] = make(parquet.Row, 0, 2)
		rows[i] = schema.Deconstruct(rows[i], model)
	}

	return schema, rows
}

func BenchmarkBufferReadRows100x(b *testing.B) {
	schema, rows := generateBenchmarkBufferRows(benchmarkNumRows)
	buffer := parquet.NewBuffer(schema)

	for i := 0; i < len(rows); i += benchmarkRowsPerStep {
		j := i + benchmarkRowsPerStep
		if _, err := buffer.WriteRows(rows[i:j]); err != nil {
			b.Fatal(err)
		}
	}

	bufferRows := buffer.Rows()
	defer bufferRows.Close()

	benchmarkRowsPerSecond(b, func() int {
		n, err := bufferRows.ReadRows(rows[:benchmarkRowsPerStep])
		if err != nil {
			if errors.Is(err, io.EOF) {
				err = bufferRows.SeekToRow(0)
			}
			if err != nil {
				b.Fatal(err)
			}
		}
		return n
	})
}

func BenchmarkBufferWriteRows100x(b *testing.B) {
	schema, rows := generateBenchmarkBufferRows(benchmarkNumRows)
	buffer := parquet.NewBuffer(schema)

	i := 0
	benchmarkRowsPerSecond(b, func() int {
		n, err := buffer.WriteRows(rows[i : i+benchmarkRowsPerStep])
		if err != nil {
			b.Fatal(err)
		}

		i += benchmarkRowsPerStep
		i %= benchmarkNumRows

		if i == 0 {
			buffer.Reset()
		}
		return n
	})
}


================================================
FILE: column.go
================================================
package parquet

import (
	"encoding/binary"
	"fmt"
	"io"
	"reflect"

	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

// Column represents a column in a parquet file.
//
// Methods of Column values are safe to call concurrently from multiple
// goroutines.
//
// Column instances satisfy the Node interface.
type Column struct {
	typ         Type
	file        *File
	schema      *format.SchemaElement
	order       *format.ColumnOrder
	path        columnPath
	columns     []*Column
	chunks      []*format.ColumnChunk
	columnIndex []*format.ColumnIndex
	offsetIndex []*format.OffsetIndex
	encoding    encoding.Encoding
	compression compress.Codec

	depth              int8
	maxRepetitionLevel byte
	maxDefinitionLevel byte
	index              int16
}

// Type returns the type of the column.
//
// The returned value is unspecified if c is not a leaf column.
func (c *Column) Type() Type { return c.typ }

// Optional returns true if the column is optional.
func (c *Column) Optional() bool { return schemaRepetitionTypeOf(c.schema) == format.Optional }

// Repeated returns true if the column may repeat.
func (c *Column) Repeated() bool { return schemaRepetitionTypeOf(c.schema) == format.Repeated }

// Required returns true if the column is required.
func (c *Column) Required() bool { return schemaRepetitionTypeOf(c.schema) == format.Required }

// Leaf returns true if c is a leaf column.
func (c *Column) Leaf() bool { return c.index >= 0 }

// Fields returns the list of fields on the column.
func (c *Column) Fields() []Field {
	fields := make([]Field, len(c.columns))
	for i, column := range c.columns {
		fields[i] = column
	}
	return fields
}

// Encoding returns the encodings used by this column.
func (c *Column) Encoding() encoding.Encoding { return c.encoding }

// Compression returns the compression codecs used by this column.
func (c *Column) Compression() compress.Codec { return c.compression }

// Path of the column in the parquet schema.
func (c *Column) Path() []string { return c.path[1:] }

// Name returns the column name.
func (c *Column) Name() string { return c.schema.Name }

// Columns returns the list of child columns.
//
// The method returns the same slice across multiple calls, the program must
// treat it as a read-only value.
func (c *Column) Columns() []*Column { return c.columns }

// Column returns the child column matching the given name.
func (c *Column) Column(name string) *Column {
	for _, child := range c.columns {
		if child.Name() == name {
			return child
		}
	}
	return nil
}

// Pages returns a reader exposing all pages in this column, across row groups.
func (c *Column) Pages() Pages {
	if c.index < 0 {
		return emptyPages{}
	}
	r := &columnPages{
		pages: make([]filePages, len(c.file.rowGroups)),
	}
	for i := range r.pages {
		r.pages[i].init(c.file.rowGroups[i].(*fileRowGroup).columns[c.index].(*fileColumnChunk))
	}
	return r
}

type columnPages struct {
	pages []filePages
	index int
}

func (c *columnPages) ReadPage() (Page, error) {
	for {
		if c.index >= len(c.pages) {
			return nil, io.EOF
		}
		p, err := c.pages[c.index].ReadPage()
		if err == nil || err != io.EOF {
			return p, err
		}
		c.index++
	}
}

func (c *columnPages) SeekToRow(rowIndex int64) error {
	c.index = 0

	for c.index < len(c.pages) && c.pages[c.index].chunk.rowGroup.NumRows >= rowIndex {
		rowIndex -= c.pages[c.index].chunk.rowGroup.NumRows
		c.index++
	}

	if c.index < len(c.pages) {
		if err := c.pages[c.index].SeekToRow(rowIndex); err != nil {
			return err
		}
		for i := range c.pages[c.index:] {
			p := &c.pages[c.index+i]
			if err := p.SeekToRow(0); err != nil {
				return err
			}
		}
	}
	return nil
}

func (c *columnPages) Close() error {
	var lastErr error

	for i := range c.pages {
		if err := c.pages[i].Close(); err != nil {
			lastErr = err
		}
	}

	c.pages = nil
	c.index = 0
	return lastErr
}

// Depth returns the position of the column relative to the root.
func (c *Column) Depth() int { return int(c.depth) }

// MaxRepetitionLevel returns the maximum value of repetition levels on this
// column.
func (c *Column) MaxRepetitionLevel() int { return int(c.maxRepetitionLevel) }

// MaxDefinitionLevel returns the maximum value of definition levels on this
// column.
func (c *Column) MaxDefinitionLevel() int { return int(c.maxDefinitionLevel) }

// Index returns the position of the column in a row. Only leaf columns have a
// column index, the method returns -1 when called on non-leaf columns.
func (c *Column) Index() int { return int(c.index) }

// GoType returns the Go type that best represents the parquet column.
func (c *Column) GoType() reflect.Type { return goTypeOf(c) }

// Value returns the sub-value in base for the child column at the given
// index.
func (c *Column) Value(base reflect.Value) reflect.Value {
	return base.MapIndex(reflect.ValueOf(&c.schema.Name).Elem())
}

// String returns a human-readable string representation of the column.
func (c *Column) String() string { return c.path.String() + ": " + sprint(c.Name(), c) }

func (c *Column) forEachLeaf(do func(*Column)) {
	if len(c.columns) == 0 {
		do(c)
	} else {
		for _, child := range c.columns {
			child.forEachLeaf(do)
		}
	}
}

func openColumns(file *File) (*Column, error) {
	cl := columnLoader{}

	c, err := cl.open(file, nil)
	if err != nil {
		return nil, err
	}

	// Validate that there aren't extra entries in the row group columns,
	// which would otherwise indicate that there are dangling data pages
	// in the file.
	for index, rowGroup := range file.metadata.RowGroups {
		if cl.rowGroupColumnIndex != len(rowGroup.Columns) {
			return nil, fmt.Errorf("row group at index %d contains %d columns but %d were referenced by the column schemas",
				index, len(rowGroup.Columns), cl.rowGroupColumnIndex)
		}
	}

	_, err = c.setLevels(0, 0, 0, 0)
	return c, err
}

func (c *Column) setLevels(depth, repetition, definition, index int) (int, error) {
	if depth > MaxColumnDepth {
		return -1, fmt.Errorf("cannot represent parquet columns with more than %d nested levels: %s", MaxColumnDepth, c.path)
	}
	if index > MaxColumnIndex {
		return -1, fmt.Errorf("cannot represent parquet rows with more than %d columns: %s", MaxColumnIndex, c.path)
	}
	if repetition > MaxRepetitionLevel {
		return -1, fmt.Errorf("cannot represent parquet columns with more than %d repetition levels: %s", MaxRepetitionLevel, c.path)
	}
	if definition > MaxDefinitionLevel {
		return -1, fmt.Errorf("cannot represent parquet columns with more than %d definition levels: %s", MaxDefinitionLevel, c.path)
	}

	switch schemaRepetitionTypeOf(c.schema) {
	case format.Optional:
		definition++
	case format.Repeated:
		repetition++
		definition++
	}

	c.depth = int8(depth)
	c.maxRepetitionLevel = byte(repetition)
	c.maxDefinitionLevel = byte(definition)
	depth++

	if len(c.columns) > 0 {
		c.index = -1
	} else {
		c.index = int16(index)
		index++
	}

	var err error
	for _, child := range c.columns {
		if index, err = child.setLevels(depth, repetition, definition, index); err != nil {
			return -1, err
		}
	}
	return index, nil
}

type columnLoader struct {
	schemaIndex         int
	columnOrderIndex    int
	rowGroupColumnIndex int
}

func (cl *columnLoader) open(file *File, path []string) (*Column, error) {
	c := &Column{
		file:   file,
		schema: &file.metadata.Schema[cl.schemaIndex],
	}
	c.path = columnPath(path).append(c.schema.Name)

	cl.schemaIndex++
	numChildren := int(c.schema.NumChildren)

	if numChildren == 0 {
		c.typ = schemaElementTypeOf(c.schema)

		if cl.columnOrderIndex < len(file.metadata.ColumnOrders) {
			c.order = &file.metadata.ColumnOrders[cl.columnOrderIndex]
			cl.columnOrderIndex++
		}

		rowGroups := file.metadata.RowGroups
		rowGroupColumnIndex := cl.rowGroupColumnIndex
		cl.rowGroupColumnIndex++

		c.chunks = make([]*format.ColumnChunk, 0, len(rowGroups))
		c.columnIndex = make([]*format.ColumnIndex, 0, len(rowGroups))
		c.offsetIndex = make([]*format.OffsetIndex, 0, len(rowGroups))

		for i, rowGroup := range rowGroups {
			if rowGroupColumnIndex >= len(rowGroup.Columns) {
				return nil, fmt.Errorf("row group at index %d does not have enough columns", i)
			}
			c.chunks = append(c.chunks, &rowGroup.Columns[rowGroupColumnIndex])
		}

		if len(file.columnIndexes) > 0 {
			for i := range rowGroups {
				if rowGroupColumnIndex >= len(file.columnIndexes) {
					return nil, fmt.Errorf("row group at index %d does not have enough column index pages", i)
				}
				c.columnIndex = append(c.columnIndex, &file.columnIndexes[rowGroupColumnIndex])
			}
		}

		if len(file.offsetIndexes) > 0 {
			for i := range rowGroups {
				if rowGroupColumnIndex >= len(file.offsetIndexes) {
					return nil, fmt.Errorf("row group at index %d does not have enough offset index pages", i)
				}
				c.offsetIndex = append(c.offsetIndex, &file.offsetIndexes[rowGroupColumnIndex])
			}
		}

		if len(c.chunks) > 0 {
			// Pick the encoding and compression codec of the first chunk.
			//
			// Technically each column chunk may use a different compression
			// codec, and each page of the column chunk might have a different
			// encoding. Exposing these details does not provide a lot of value
			// to the end user.
			//
			// Programs that wish to determine the encoding and compression of
			// each page of the column should iterate through the pages and read
			// the page headers to determine which compression and encodings are
			// applied.
			for _, encoding := range c.chunks[0].MetaData.Encoding {
				if c.encoding == nil {
					c.encoding = LookupEncoding(encoding)
				}
				if encoding != format.Plain && encoding != format.RLE {
					c.encoding = LookupEncoding(encoding)
					break
				}
			}
			c.compression = LookupCompressionCodec(c.chunks[0].MetaData.Codec)
		}

		return c, nil
	}

	c.typ = &groupType{}
	c.columns = make([]*Column, numChildren)

	for i := range c.columns {
		if cl.schemaIndex >= len(file.metadata.Schema) {
			return nil, fmt.Errorf("column %q has more children than there are schemas in the file: %d > %d",
				c.schema.Name, cl.schemaIndex+1, len(file.metadata.Schema))
		}

		var err error
		c.columns[i], err = cl.open(file, c.path)
		if err != nil {
			return nil, fmt.Errorf("%s: %w", c.schema.Name, err)
		}
	}

	return c, nil
}

func schemaElementTypeOf(s *format.SchemaElement) Type {
	if lt := s.LogicalType; lt != nil {
		// A logical type exists, the Type interface implementations in this
		// package are all based on the logical parquet types declared in the
		// format sub-package so we can return them directly via a pointer type
		// conversion.
		switch {
		case lt.UTF8 != nil:
			return (*stringType)(lt.UTF8)
		case lt.Map != nil:
			return (*mapType)(lt.Map)
		case lt.List != nil:
			return (*listType)(lt.List)
		case lt.Enum != nil:
			return (*enumType)(lt.Enum)
		case lt.Decimal != nil:
			// A parquet decimal can be one of several different physical types.
			if t := s.Type; t != nil {
				var typ Type
				switch kind := Kind(*s.Type); kind {
				case Int32:
					typ = Int32Type
				case Int64:
					typ = Int64Type
				case FixedLenByteArray:
					if s.TypeLength == nil {
						panic("DECIMAL using FIXED_LEN_BYTE_ARRAY must specify a length")
					}
					typ = FixedLenByteArrayType(int(*s.TypeLength))
				default:
					panic("DECIMAL must be of type INT32, INT64, or FIXED_LEN_BYTE_ARRAY but got " + kind.String())
				}
				return &decimalType{
					decimal: *lt.Decimal,
					Type:    typ,
				}
			}
		case lt.Date != nil:
			return (*dateType)(lt.Date)
		case lt.Time != nil:
			return (*timeType)(lt.Time)
		case lt.Timestamp != nil:
			return (*timestampType)(lt.Timestamp)
		case lt.Integer != nil:
			return (*intType)(lt.Integer)
		case lt.Unknown != nil:
			return (*nullType)(lt.Unknown)
		case lt.Json != nil:
			return (*jsonType)(lt.Json)
		case lt.Bson != nil:
			return (*bsonType)(lt.Bson)
		case lt.UUID != nil:
			return (*uuidType)(lt.UUID)
		}
	}

	if ct := s.ConvertedType; ct != nil {
		// This column contains no logical type but has a converted type, it
		// was likely created by an older parquet writer. Convert the legacy
		// type representation to the equivalent logical parquet type.
		switch *ct {
		case deprecated.UTF8:
			return &stringType{}
		case deprecated.Map:
			return &mapType{}
		case deprecated.MapKeyValue:
			return &groupType{}
		case deprecated.List:
			return &listType{}
		case deprecated.Enum:
			return &enumType{}
		case deprecated.Decimal:
			if s.Scale != nil && s.Precision != nil {
				// A parquet decimal can be one of several different physical types.
				if t := s.Type; t != nil {
					var typ Type
					switch kind := Kind(*s.Type); kind {
					case Int32:
						typ = Int32Type
					case Int64:
						typ = Int64Type
					case FixedLenByteArray:
						if s.TypeLength == nil {
							panic("DECIMAL using FIXED_LEN_BYTE_ARRAY must specify a length")
						}
						typ = FixedLenByteArrayType(int(*s.TypeLength))
					case ByteArray:
						typ = ByteArrayType
					default:
						panic("DECIMAL must be of type INT32, INT64, BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY but got " + kind.String())
					}
					return &decimalType{
						decimal: format.DecimalType{
							Scale:     *s.Scale,
							Precision: *s.Precision,
						},
						Type: typ,
					}
				}
			}
		case deprecated.Date:
			return &dateType{}
		case deprecated.TimeMillis:
			return &timeType{IsAdjustedToUTC: true, Unit: Millisecond.TimeUnit()}
		case deprecated.TimeMicros:
			return &timeType{IsAdjustedToUTC: true, Unit: Microsecond.TimeUnit()}
		case deprecated.TimestampMillis:
			return &timestampType{IsAdjustedToUTC: true, Unit: Millisecond.TimeUnit()}
		case deprecated.TimestampMicros:
			return &timestampType{IsAdjustedToUTC: true, Unit: Microsecond.TimeUnit()}
		case deprecated.Uint8:
			return &unsignedIntTypes[0]
		case deprecated.Uint16:
			return &unsignedIntTypes[1]
		case deprecated.Uint32:
			return &unsignedIntTypes[2]
		case deprecated.Uint64:
			return &unsignedIntTypes[3]
		case deprecated.Int8:
			return &signedIntTypes[0]
		case deprecated.Int16:
			return &signedIntTypes[1]
		case deprecated.Int32:
			return &signedIntTypes[2]
		case deprecated.Int64:
			return &signedIntTypes[3]
		case deprecated.Json:
			return &jsonType{}
		case deprecated.Bson:
			return &bsonType{}
		case deprecated.Interval:
			// TODO
		}
	}

	if t := s.Type; t != nil {
		// The column only has a physical type, convert it to one of the
		// primitive types supported by this package.
		switch kind := Kind(*t); kind {
		case Boolean:
			return BooleanType
		case Int32:
			return Int32Type
		case Int64:
			return Int64Type
		case Int96:
			return Int96Type
		case Float:
			return FloatType
		case Double:
			return DoubleType
		case ByteArray:
			return ByteArrayType
		case FixedLenByteArray:
			if s.TypeLength != nil {
				return FixedLenByteArrayType(int(*s.TypeLength))
			}
		}
	}

	// If we reach this point, we are likely reading a parquet column that was
	// written with a non-standard type or is in a newer version of the format
	// than this package supports.
	return &nullType{}
}

func schemaRepetitionTypeOf(s *format.SchemaElement) format.FieldRepetitionType {
	if s.RepetitionType != nil {
		return *s.RepetitionType
	}
	return format.Required
}

func (c *Column) decompress(compressedPageData []byte, uncompressedPageSize int32) (page *buffer, err error) {
	page = buffers.get(int(uncompressedPageSize))
	page.data, err = c.compression.Decode(page.data, compressedPageData)
	if err != nil {
		page.unref()
		page = nil
	}
	return page, err
}

// DecodeDataPageV1 decodes a data page from the header, compressed data, and
// optional dictionary passed as arguments.
func (c *Column) DecodeDataPageV1(header DataPageHeaderV1, page []byte, dict Dictionary) (Page, error) {
	return c.decodeDataPageV1(header, &buffer{data: page}, dict, -1)
}

func (c *Column) decodeDataPageV1(header DataPageHeaderV1, page *buffer, dict Dictionary, size int32) (Page, error) {
	var pageData = page.data
	var err error

	if isCompressed(c.compression) {
		if page, err = c.decompress(pageData, size); err != nil {
			return nil, fmt.Errorf("decompressing data page v1: %w", err)
		}
		defer page.unref()
		pageData = page.data
	}

	var numValues = int(header.NumValues())
	var repetitionLevels *buffer
	var definitionLevels *buffer

	if c.maxRepetitionLevel > 0 {
		encoding := lookupLevelEncoding(header.RepetitionLevelEncoding(), c.maxRepetitionLevel)
		repetitionLevels, pageData, err = decodeLevelsV1(encoding, numValues, pageData)
		if err != nil {
			return nil, fmt.Errorf("decoding repetition levels of data page v1: %w", err)
		}
		defer repetitionLevels.unref()
	}

	if c.maxDefinitionLevel > 0 {
		encoding := lookupLevelEncoding(header.DefinitionLevelEncoding(), c.maxDefinitionLevel)
		definitionLevels, pageData, err = decodeLevelsV1(encoding, numValues, pageData)
		if err != nil {
			return nil, fmt.Errorf("decoding definition levels of data page v1: %w", err)
		}
		defer definitionLevels.unref()

		// Data pages v1 did not embed the number of null values,
		// so we have to compute it from the definition levels.
		numValues -= countLevelsNotEqual(definitionLevels.data, c.maxDefinitionLevel)
	}

	return c.decodeDataPage(header, numValues, repetitionLevels, definitionLevels, page, pageData, dict)
}

// DecodeDataPageV2 decodes a data page from the header, compressed data, and
// optional dictionary passed as arguments.
func (c *Column) DecodeDataPageV2(header DataPageHeaderV2, page []byte, dict Dictionary) (Page, error) {
	return c.decodeDataPageV2(header, &buffer{data: page}, dict, -1)
}

func (c *Column) decodeDataPageV2(header DataPageHeaderV2, page *buffer, dict Dictionary, size int32) (Page, error) {
	var numValues = int(header.NumValues())
	var pageData = page.data
	var err error
	var repetitionLevels *buffer
	var definitionLevels *buffer

	if length := header.RepetitionLevelsByteLength(); length > 0 {
		if c.maxRepetitionLevel == 0 {
			// In some cases we've observed files which have a non-zero
			// repetition level despite the column not being repeated
			// (nor nested within a repeated column).
			//
			// See https://github.com/apache/parquet-testing/pull/24
			pageData, err = skipLevelsV2(pageData, length)
		} else {
			encoding := lookupLevelEncoding(header.RepetitionLevelEncoding(), c.maxRepetitionLevel)
			repetitionLevels, pageData, err = decodeLevelsV2(encoding, numValues, pageData, length)
		}
		if err != nil {
			return nil, fmt.Errorf("decoding repetition levels of data page v2: %w", io.ErrUnexpectedEOF)
		}
		if repetitionLevels != nil {
			defer repetitionLevels.unref()
		}
	}

	if length := header.DefinitionLevelsByteLength(); length > 0 {
		if c.maxDefinitionLevel == 0 {
			pageData, err = skipLevelsV2(pageData, length)
		} else {
			encoding := lookupLevelEncoding(header.DefinitionLevelEncoding(), c.maxDefinitionLevel)
			definitionLevels, pageData, err = decodeLevelsV2(encoding, numValues, pageData, length)
		}
		if err != nil {
			return nil, fmt.Errorf("decoding definition levels of data page v2: %w", io.ErrUnexpectedEOF)
		}
		if definitionLevels != nil {
			defer definitionLevels.unref()
		}
	}

	if isCompressed(c.compression) && header.IsCompressed() {
		if page, err = c.decompress(pageData, size); err != nil {
			return nil, fmt.Errorf("decompressing data page v2: %w", err)
		}
		defer page.unref()
		pageData = page.data
	}

	numValues -= int(header.NumNulls())
	return c.decodeDataPage(header, numValues, repetitionLevels, definitionLevels, page, pageData, dict)
}

func (c *Column) decodeDataPage(header DataPageHeader, numValues int, repetitionLevels, definitionLevels, page *buffer, data []byte, dict Dictionary) (Page, error) {
	pageEncoding := LookupEncoding(header.Encoding())
	pageType := c.Type()

	if isDictionaryEncoding(pageEncoding) {
		// In some legacy configurations, the PLAIN_DICTIONARY encoding is used
		// on data page headers to indicate that the page contains indexes into
		// the dictionary page, but the page is still encoded using the RLE
		// encoding in this case, so we convert it to RLE_DICTIONARY.
		pageEncoding = &RLEDictionary
		pageType = indexedPageType{newIndexedType(pageType, dict)}
	}

	var vbuf, obuf *buffer
	var pageValues []byte
	var pageOffsets []uint32

	if pageEncoding.CanDecodeInPlace() {
		vbuf = page
		pageValues = data
	} else {
		vbuf = buffers.get(pageType.EstimateDecodeSize(numValues, data, pageEncoding))
		defer vbuf.unref()
		pageValues = vbuf.data
	}

	// Page offsets not needed when dictionary-encoded
	if pageType.Kind() == ByteArray && !isDictionaryEncoding(pageEncoding) {
		obuf = buffers.get(4 * (numValues + 1))
		defer obuf.unref()
		pageOffsets = unsafecast.BytesToUint32(obuf.data)
	}

	values := pageType.NewValues(pageValues, pageOffsets)
	values, err := pageType.Decode(values, data, pageEncoding)
	if err != nil {
		return nil, err
	}

	newPage := pageType.NewPage(c.Index(), numValues, values)
	switch {
	case c.maxRepetitionLevel > 0:
		newPage = newRepeatedPage(
			newPage,
			c.maxRepetitionLevel,
			c.maxDefinitionLevel,
			repetitionLevels.data,
			definitionLevels.data,
		)
	case c.maxDefinitionLevel > 0:
		newPage = newOptionalPage(
			newPage,
			c.maxDefinitionLevel,
			definitionLevels.data,
		)
	}

	return newBufferedPage(newPage, vbuf, obuf, repetitionLevels, definitionLevels), nil
}

func decodeLevelsV1(enc encoding.Encoding, numValues int, data []byte) (*buffer, []byte, error) {
	if len(data) < 4 {
		return nil, data, io.ErrUnexpectedEOF
	}
	i := 4
	j := 4 + int(binary.LittleEndian.Uint32(data))
	if j > len(data) {
		return nil, data, io.ErrUnexpectedEOF
	}
	levels, err := decodeLevels(enc, numValues, data[i:j])
	return levels, data[j:], err
}

func decodeLevelsV2(enc encoding.Encoding, numValues int, data []byte, length int64) (*buffer, []byte, error) {
	levels, err := decodeLevels(enc, numValues, data[:length])
	return levels, data[length:], err
}

func decodeLevels(enc encoding.Encoding, numValues int, data []byte) (levels *buffer, err error) {
	levels = buffers.get(numValues)
	levels.data, err = enc.DecodeLevels(levels.data, data)
	if err != nil {
		levels.unref()
		levels = nil
	} else {
		switch {
		case len(levels.data) < numValues:
			err = fmt.Errorf("decoding level expected %d values but got only %d", numValues, len(levels.data))
		case len(levels.data) > numValues:
			levels.data = levels.data[:numValues]
		}
	}
	return levels, err
}

func skipLevelsV2(data []byte, length int64) ([]byte, error) {
	if length >= int64(len(data)) {
		return data, io.ErrUnexpectedEOF
	}
	return data[length:], nil
}

// DecodeDictionary decodes a data page from the header and compressed data
// passed as arguments.
func (c *Column) DecodeDictionary(header DictionaryPageHeader, page []byte) (Dictionary, error) {
	return c.decodeDictionary(header, &buffer{data: page}, -1)
}

func (c *Column) decodeDictionary(header DictionaryPageHeader, page *buffer, size int32) (Dictionary, error) {
	pageData := page.data

	if isCompressed(c.compression) {
		var err error
		if page, err = c.decompress(pageData, size); err != nil {
			return nil, fmt.Errorf("decompressing dictionary page: %w", err)
		}
		defer page.unref()
		pageData = page.data
	}

	pageType := c.Type()
	pageEncoding := header.Encoding()
	if pageEncoding == format.PlainDictionary {
		pageEncoding = format.Plain
	}

	numValues := int(header.NumValues())
	values := pageType.NewValues(nil, nil)
	values, err := pageType.Decode(values, pageData, LookupEncoding(pageEncoding))
	if err != nil {
		return nil, err
	}
	return pageType.NewDictionary(int(c.index), numValues, values), nil
}

var (
	_ Node = (*Column)(nil)
)


================================================
FILE: column_buffer.go
================================================
package parquet

import (
	"bytes"
	"fmt"
	"io"
	"sort"
	"unsafe"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/internal/bitpack"
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
)

// ColumnBuffer is an interface representing columns of a row group.
//
// ColumnBuffer implements sort.Interface as a way to support reordering the
// rows that have been written to it.
//
// The current implementation has a limitation which prevents applications from
// providing custom versions of this interface because it contains unexported
// methods. The only way to create ColumnBuffer values is to call the
// NewColumnBuffer of Type instances. This limitation may be lifted in future
// releases.
type ColumnBuffer interface {
	// Exposes a read-only view of the column buffer.
	ColumnChunk

	// The column implements ValueReaderAt as a mechanism to read values at
	// specific locations within the buffer.
	ValueReaderAt

	// The column implements ValueWriter as a mechanism to optimize the copy
	// of values into the buffer in contexts where the row information is
	// provided by the values because the repetition and definition levels
	// are set.
	ValueWriter

	// For indexed columns, returns the underlying dictionary holding the column
	// values. If the column is not indexed, nil is returned.
	Dictionary() Dictionary

	// Returns a copy of the column. The returned copy shares no memory with
	// the original, mutations of either column will not modify the other.
	Clone() ColumnBuffer

	// Returns the column as a Page.
	Page() Page

	// Clears all rows written to the column.
	Reset()

	// Returns the current capacity of the column (rows).
	Cap() int

	// Returns the number of rows currently written to the column.
	Len() int

	// Compares rows at index i and j and reports whether i < j.
	Less(i, j int) bool

	// Swaps rows at index i and j.
	Swap(i, j int)

	// Returns the size of the column buffer in bytes.
	Size() int64

	// This method is employed to write rows from arrays of Go values into the
	// column buffer. The method is currently unexported because it uses unsafe
	// APIs which would be difficult for applications to leverage, increasing
	// the risk of introducing bugs in the code. As a consequence, applications
	// cannot use custom implementations of the ColumnBuffer interface since
	// they cannot declare an unexported method that would match this signature.
	// It means that in order to create a ColumnBuffer value, programs need to
	// go through a call to NewColumnBuffer on a Type instance. We make this
	// trade off for now as it is preferrable to optimize for safety over
	// extensibility in the public APIs, we might revisit in the future if we
	// learn about valid use cases for custom column buffer types.
	writeValues(rows sparse.Array, levels columnLevels)
}

type columnLevels struct {
	repetitionDepth byte
	repetitionLevel byte
	definitionLevel byte
}

func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitionLevels []byte) ColumnIndex {
	return &nullableColumnIndex{
		ColumnIndex:        base.ColumnIndex(),
		maxDefinitionLevel: maxDefinitionLevel,
		definitionLevels:   definitionLevels,
	}
}

type nullableColumnIndex struct {
	ColumnIndex
	maxDefinitionLevel byte
	definitionLevels   []byte
}

func (index *nullableColumnIndex) NullPage(i int) bool {
	return index.NullCount(i) == int64(len(index.definitionLevels))
}

func (index *nullableColumnIndex) NullCount(i int) int64 {
	return int64(countLevelsNotEqual(index.definitionLevels, index.maxDefinitionLevel))
}

type nullOrdering func(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool

func nullsGoFirst(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
	if definitionLevel1 != maxDefinitionLevel {
		return definitionLevel2 == maxDefinitionLevel
	} else {
		return definitionLevel2 == maxDefinitionLevel && column.Less(i, j)
	}
}

func nullsGoLast(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
	return definitionLevel1 == maxDefinitionLevel && (definitionLevel2 != maxDefinitionLevel || column.Less(i, j))
}

// reversedColumnBuffer is an adapter of ColumnBuffer which inverses the order
// in which rows are ordered when the column gets sorted.
//
// This type is used when buffers are constructed with sorting columns ordering
// values in descending order.
type reversedColumnBuffer struct{ ColumnBuffer }

func (col *reversedColumnBuffer) Less(i, j int) bool { return col.ColumnBuffer.Less(j, i) }

// optionalColumnBuffer is an implementation of the ColumnBuffer interface used
// as a wrapper to an underlying ColumnBuffer to manage the creation of
// definition levels.
//
// Null values are not written to the underlying column; instead, the buffer
// tracks offsets of row values in the column, null row values are represented
// by the value -1 and a definition level less than the max.
//
// This column buffer type is used for all leaf columns that have a non-zero
// max definition level and a zero repetition level, which may be because the
// column or one of its parent(s) are marked optional.
type optionalColumnBuffer struct {
	base               ColumnBuffer
	reordered          bool
	maxDefinitionLevel byte
	rows               []int32
	sortIndex          []int32
	definitionLevels   []byte
	nullOrdering       nullOrdering
}

func newOptionalColumnBuffer(base ColumnBuffer, maxDefinitionLevel byte, nullOrdering nullOrdering) *optionalColumnBuffer {
	n := base.Cap()
	return &optionalColumnBuffer{
		base:               base,
		maxDefinitionLevel: maxDefinitionLevel,
		rows:               make([]int32, 0, n),
		definitionLevels:   make([]byte, 0, n),
		nullOrdering:       nullOrdering,
	}
}

func (col *optionalColumnBuffer) Clone() ColumnBuffer {
	return &optionalColumnBuffer{
		base:               col.base.Clone(),
		reordered:          col.reordered,
		maxDefinitionLevel: col.maxDefinitionLevel,
		rows:               append([]int32{}, col.rows...),
		definitionLevels:   append([]byte{}, col.definitionLevels...),
		nullOrdering:       col.nullOrdering,
	}
}

func (col *optionalColumnBuffer) Type() Type {
	return col.base.Type()
}

func (col *optionalColumnBuffer) NumValues() int64 {
	return int64(len(col.definitionLevels))
}

func (col *optionalColumnBuffer) ColumnIndex() ColumnIndex {
	return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
}

func (col *optionalColumnBuffer) OffsetIndex() OffsetIndex {
	return col.base.OffsetIndex()
}

func (col *optionalColumnBuffer) BloomFilter() BloomFilter {
	return col.base.BloomFilter()
}

func (col *optionalColumnBuffer) Dictionary() Dictionary {
	return col.base.Dictionary()
}

func (col *optionalColumnBuffer) Column() int {
	return col.base.Column()
}

func (col *optionalColumnBuffer) Pages() Pages {
	return onePage(col.Page())
}

func (col *optionalColumnBuffer) Page() Page {
	// No need for any cyclic sorting if the rows have not been reordered.
	// This case is also important because the cyclic sorting modifies the
	// buffer which makes it unsafe to read the buffer concurrently.
	if col.reordered {
		numNulls := countLevelsNotEqual(col.definitionLevels, col.maxDefinitionLevel)
		numValues := len(col.rows) - numNulls

		if numValues > 0 {
			if cap(col.sortIndex) < numValues {
				col.sortIndex = make([]int32, numValues)
			}
			sortIndex := col.sortIndex[:numValues]
			i := 0
			for _, j := range col.rows {
				if j >= 0 {
					sortIndex[j] = int32(i)
					i++
				}
			}

			// Cyclic sort: O(N)
			for i := range sortIndex {
				for j := int(sortIndex[i]); i != j; j = int(sortIndex[i]) {
					col.base.Swap(i, j)
					sortIndex[i], sortIndex[j] = sortIndex[j], sortIndex[i]
				}
			}
		}

		i := 0
		for _, r := range col.rows {
			if r >= 0 {
				col.rows[i] = int32(i)
				i++
			}
		}

		col.reordered = false
	}

	return newOptionalPage(col.base.Page(), col.maxDefinitionLevel, col.definitionLevels)
}

func (col *optionalColumnBuffer) Reset() {
	col.base.Reset()
	col.rows = col.rows[:0]
	col.definitionLevels = col.definitionLevels[:0]
}

func (col *optionalColumnBuffer) Size() int64 {
	return int64(4*len(col.rows)+4*len(col.sortIndex)+len(col.definitionLevels)) + col.base.Size()
}

func (col *optionalColumnBuffer) Cap() int { return cap(col.rows) }

func (col *optionalColumnBuffer) Len() int { return len(col.rows) }

func (col *optionalColumnBuffer) Less(i, j int) bool {
	return col.nullOrdering(
		col.base,
		int(col.rows[i]),
		int(col.rows[j]),
		col.maxDefinitionLevel,
		col.definitionLevels[i],
		col.definitionLevels[j],
	)
}

func (col *optionalColumnBuffer) Swap(i, j int) {
	// Because the underlying column does not contain null values, we cannot
	// swap its values at indexes i and j. We swap the row indexes only, then
	// reorder the underlying buffer using a cyclic sort when the buffer is
	// materialized into a page view.
	col.reordered = true
	col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
	col.definitionLevels[i], col.definitionLevels[j] = col.definitionLevels[j], col.definitionLevels[i]
}

func (col *optionalColumnBuffer) WriteValues(values []Value) (n int, err error) {
	rowIndex := int32(col.base.Len())

	for n < len(values) {
		// Collect index range of contiguous null values, from i to n. If this
		// for loop exhausts the values, all remaining if statements and for
		// loops will be no-ops and the loop will terminate.
		i := n
		for n < len(values) && values[n].definitionLevel != col.maxDefinitionLevel {
			n++
		}

		// Write the contiguous null values up until the first non-null value
		// obtained in the for loop above.
		for _, v := range values[i:n] {
			col.rows = append(col.rows, -1)
			col.definitionLevels = append(col.definitionLevels, v.definitionLevel)
		}

		// Collect index range of contiguous non-null values, from i to n.
		i = n
		for n < len(values) && values[n].definitionLevel == col.maxDefinitionLevel {
			n++
		}

		// As long as i < n we have non-null values still to write. It is
		// possible that we just exhausted the input values in which case i == n
		// and the outer for loop will terminate.
		if i < n {
			count, err := col.base.WriteValues(values[i:n])
			col.definitionLevels = appendLevel(col.definitionLevels, col.maxDefinitionLevel, count)

			for count > 0 {
				col.rows = append(col.rows, rowIndex)
				rowIndex++
				count--
			}

			if err != nil {
				return n, err
			}
		}
	}
	return n, nil
}

func (col *optionalColumnBuffer) writeValues(rows sparse.Array, levels columnLevels) {
	// The row count is zero when writing an null optional value, in which case
	// we still need to output a row to the buffer to record the definition
	// level.
	if rows.Len() == 0 {
		col.definitionLevels = append(col.definitionLevels, levels.definitionLevel)
		col.rows = append(col.rows, -1)
		return
	}

	col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, rows.Len())

	i := len(col.rows)
	j := len(col.rows) + rows.Len()

	if j <= cap(col.rows) {
		col.rows = col.rows[:j]
	} else {
		tmp := make([]int32, j, 2*j)
		copy(tmp, col.rows)
		col.rows = tmp
	}

	if levels.definitionLevel != col.maxDefinitionLevel {
		broadcastValueInt32(col.rows[i:], -1)
	} else {
		broadcastRangeInt32(col.rows[i:], int32(col.base.Len()))
		col.base.writeValues(rows, levels)
	}
}

func (col *optionalColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) {
	length := int64(len(col.definitionLevels))
	if offset < 0 {
		return 0, errRowIndexOutOfBounds(offset, length)
	}
	if offset >= length {
		return 0, io.EOF
	}
	if length -= offset; length < int64(len(values)) {
		values = values[:length]
	}

	numNulls1 := int64(countLevelsNotEqual(col.definitionLevels[:offset], col.maxDefinitionLevel))
	numNulls2 := int64(countLevelsNotEqual(col.definitionLevels[offset:offset+length], col.maxDefinitionLevel))

	if numNulls2 < length {
		n, err := col.base.ReadValuesAt(values[:length-numNulls2], offset-numNulls1)
		if err != nil {
			return n, err
		}
	}

	if numNulls2 > 0 {
		columnIndex := ^int16(col.Column())
		i := numNulls2 - 1
		j := length - 1
		definitionLevels := col.definitionLevels[offset : offset+length]
		maxDefinitionLevel := col.maxDefinitionLevel

		for n := len(definitionLevels) - 1; n >= 0 && j > i; n-- {
			if definitionLevels[n] != maxDefinitionLevel {
				values[j] = Value{definitionLevel: definitionLevels[n], columnIndex: columnIndex}
			} else {
				values[j] = values[i]
				i--
			}
			j--
		}
	}

	return int(length), nil
}

// repeatedColumnBuffer is an implementation of the ColumnBuffer interface used
// as a wrapper to an underlying ColumnBuffer to manage the creation of
// repetition levels, definition levels, and map rows to the region of the
// underlying buffer that contains their sequence of values.
//
// Null values are not written to the underlying column; instead, the buffer
// tracks offsets of row values in the column, null row values are represented
// by the value -1 and a definition level less than the max.
//
// This column buffer type is used for all leaf columns that have a non-zero
// max repetition level, which may be because the column or one of its parent(s)
// are marked repeated.
type repeatedColumnBuffer struct {
	base               ColumnBuffer
	reordered          bool
	maxRepetitionLevel byte
	maxDefinitionLevel byte
	rows               []offsetMapping
	repetitionLevels   []byte
	definitionLevels   []byte
	buffer             []Value
	reordering         *repeatedColumnBuffer
	nullOrdering       nullOrdering
}

// The offsetMapping type maps the logical offset of rows within the repetition
// and definition levels, to the base offsets in the underlying column buffers
// where the non-null values have been written.
type offsetMapping struct {
	offset     uint32
	baseOffset uint32
}

func newRepeatedColumnBuffer(base ColumnBuffer, maxRepetitionLevel, maxDefinitionLevel byte, nullOrdering nullOrdering) *repeatedColumnBuffer {
	n := base.Cap()
	return &repeatedColumnBuffer{
		base:               base,
		maxRepetitionLevel: maxRepetitionLevel,
		maxDefinitionLevel: maxDefinitionLevel,
		rows:               make([]offsetMapping, 0, n/8),
		repetitionLevels:   make([]byte, 0, n),
		definitionLevels:   make([]byte, 0, n),
		nullOrdering:       nullOrdering,
	}
}

func (col *repeatedColumnBuffer) Clone() ColumnBuffer {
	return &repeatedColumnBuffer{
		base:               col.base.Clone(),
		reordered:          col.reordered,
		maxRepetitionLevel: col.maxRepetitionLevel,
		maxDefinitionLevel: col.maxDefinitionLevel,
		rows:               append([]offsetMapping{}, col.rows...),
		repetitionLevels:   append([]byte{}, col.repetitionLevels...),
		definitionLevels:   append([]byte{}, col.definitionLevels...),
		nullOrdering:       col.nullOrdering,
	}
}

func (col *repeatedColumnBuffer) Type() Type {
	return col.base.Type()
}

func (col *repeatedColumnBuffer) NumValues() int64 {
	return int64(len(col.definitionLevels))
}

func (col *repeatedColumnBuffer) ColumnIndex() ColumnIndex {
	return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
}

func (col *repeatedColumnBuffer) OffsetIndex() OffsetIndex {
	return col.base.OffsetIndex()
}

func (col *repeatedColumnBuffer) BloomFilter() BloomFilter {
	return col.base.BloomFilter()
}

func (col *repeatedColumnBuffer) Dictionary() Dictionary {
	return col.base.Dictionary()
}

func (col *repeatedColumnBuffer) Column() int {
	return col.base.Column()
}

func (col *repeatedColumnBuffer) Pages() Pages {
	return onePage(col.Page())
}

func (col *repeatedColumnBuffer) Page() Page {
	if col.reordered {
		if col.reordering == nil {
			col.reordering = col.Clone().(*repeatedColumnBuffer)
		}

		column := col.reordering
		column.Reset()
		maxNumValues := 0
		defer func() {
			clearValues(col.buffer[:maxNumValues])
		}()

		baseOffset := 0

		for _, row := range col.rows {
			rowOffset := int(row.offset)
			rowLength := repeatedRowLength(col.repetitionLevels[rowOffset:])
			numNulls := countLevelsNotEqual(col.definitionLevels[rowOffset:rowOffset+rowLength], col.maxDefinitionLevel)
			numValues := rowLength - numNulls

			if numValues > 0 {
				if numValues > cap(col.buffer) {
					col.buffer = make([]Value, numValues)
				} else {
					col.buffer = col.buffer[:numValues]
				}
				n, err := col.base.ReadValuesAt(col.buffer, int64(row.baseOffset))
				if err != nil && n < numValues {
					return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
				}
				if _, err := column.base.WriteValues(col.buffer); err != nil {
					return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
				}
				if numValues > maxNumValues {
					maxNumValues = numValues
				}
			}

			column.rows = append(column.rows, offsetMapping{
				offset:     uint32(len(column.repetitionLevels)),
				baseOffset: uint32(baseOffset),
			})

			column.repetitionLevels = append(column.repetitionLevels, col.repetitionLevels[rowOffset:rowOffset+rowLength]...)
			column.definitionLevels = append(column.definitionLevels, col.definitionLevels[rowOffset:rowOffset+rowLength]...)
			baseOffset += numValues
		}

		col.swapReorderingBuffer(column)
		col.reordered = false
	}

	return newRepeatedPage(
		col.base.Page(),
		col.maxRepetitionLevel,
		col.maxDefinitionLevel,
		col.repetitionLevels,
		col.definitionLevels,
	)
}

func (col *repeatedColumnBuffer) swapReorderingBuffer(buf *repeatedColumnBuffer) {
	col.base, buf.base = buf.base, col.base
	col.rows, buf.rows = buf.rows, col.rows
	col.repetitionLevels, buf.repetitionLevels = buf.repetitionLevels, col.repetitionLevels
	col.definitionLevels, buf.definitionLevels = buf.definitionLevels, col.definitionLevels
}

func (col *repeatedColumnBuffer) Reset() {
	col.base.Reset()
	col.rows = col.rows[:0]
	col.repetitionLevels = col.repetitionLevels[:0]
	col.definitionLevels = col.definitionLevels[:0]
}

func (col *repeatedColumnBuffer) Size() int64 {
	return int64(8*len(col.rows)+len(col.repetitionLevels)+len(col.definitionLevels)) + col.base.Size()
}

func (col *repeatedColumnBuffer) Cap() int { return cap(col.rows) }

func (col *repeatedColumnBuffer) Len() int { return len(col.rows) }

func (col *repeatedColumnBuffer) Less(i, j int) bool {
	row1 := col.rows[i]
	row2 := col.rows[j]
	less := col.nullOrdering
	row1Length := repeatedRowLength(col.repetitionLevels[row1.offset:])
	row2Length := repeatedRowLength(col.repetitionLevels[row2.offset:])

	for k := 0; k < row1Length && k < row2Length; k++ {
		x := int(row1.baseOffset)
		y := int(row2.baseOffset)
		definitionLevel1 := col.definitionLevels[int(row1.offset)+k]
		definitionLevel2 := col.definitionLevels[int(row2.offset)+k]
		switch {
		case less(col.base, x, y, col.maxDefinitionLevel, definitionLevel1, definitionLevel2):
			return true
		case less(col.base, y, x, col.maxDefinitionLevel, definitionLevel2, definitionLevel1):
			return false
		}
	}

	return row1Length < row2Length
}

func (col *repeatedColumnBuffer) Swap(i, j int) {
	// Because the underlying column does not contain null values, and may hold
	// an arbitrary number of values per row, we cannot swap its values at
	// indexes i and j. We swap the row indexes only, then reorder the base
	// column buffer when its view is materialized into a page by creating a
	// copy and writing rows back to it following the order of rows in the
	// repeated column buffer.
	col.reordered = true
	col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
}

func (col *repeatedColumnBuffer) WriteValues(values []Value) (numValues int, err error) {
	maxRowLen := 0
	defer func() {
		clearValues(col.buffer[:maxRowLen])
	}()

	for i := 0; i < len(values); {
		j := i

		if values[j].repetitionLevel == 0 {
			j++
		}

		for j < len(values) && values[j].repetitionLevel != 0 {
			j++
		}

		if err := col.writeRow(values[i:j]); err != nil {
			return numValues, err
		}

		if len(col.buffer) > maxRowLen {
			maxRowLen = len(col.buffer)
		}

		numValues += j - i
		i = j
	}

	return numValues, nil
}

func (col *repeatedColumnBuffer) writeRow(row []Value) error {
	col.buffer = col.buffer[:0]

	for _, v := range row {
		if v.definitionLevel == col.maxDefinitionLevel {
			col.buffer = append(col.buffer, v)
		}
	}

	baseOffset := col.base.NumValues()
	if len(col.buffer) > 0 {
		if _, err := col.base.WriteValues(col.buffer); err != nil {
			return err
		}
	}

	if row[0].repetitionLevel == 0 {
		col.rows = append(col.rows, offsetMapping{
			offset:     uint32(len(col.repetitionLevels)),
			baseOffset: uint32(baseOffset),
		})
	}

	for _, v := range row {
		col.repetitionLevels = append(col.repetitionLevels, v.repetitionLevel)
		col.definitionLevels = append(col.definitionLevels, v.definitionLevel)
	}

	return nil
}

func (col *repeatedColumnBuffer) writeValues(row sparse.Array, levels columnLevels) {
	if levels.repetitionLevel == 0 {
		col.rows = append(col.rows, offsetMapping{
			offset:     uint32(len(col.repetitionLevels)),
			baseOffset: uint32(col.base.NumValues()),
		})
	}

	if row.Len() == 0 {
		col.repetitionLevels = append(col.repetitionLevels, levels.repetitionLevel)
		col.definitionLevels = append(col.definitionLevels, levels.definitionLevel)
		return
	}

	col.repetitionLevels = appendLevel(col.repetitionLevels, levels.repetitionLevel, row.Len())
	col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, row.Len())

	if levels.definitionLevel == col.maxDefinitionLevel {
		col.base.writeValues(row, levels)
	}
}

func (col *repeatedColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) {
	// TODO:
	panic("NOT IMPLEMENTED")
}

// repeatedRowLength gives the length of the repeated row starting at the
// beginning of the repetitionLevels slice.
func repeatedRowLength(repetitionLevels []byte) int {
	// If a repetition level exists, at least one value is required to represent
	// the column.
	if len(repetitionLevels) > 0 {
		// The subsequent levels will represent the start of a new record when
		// they go back to zero.
		if i := bytes.IndexByte(repetitionLevels[1:], 0); i >= 0 {
			return i + 1
		}
	}
	return len(repetitionLevels)
}

// =============================================================================
// The types below are in-memory implementations of the ColumnBuffer interface
// for each parquet type.
//
// These column buffers are created by calling NewColumnBuffer on parquet.Type
// instances; each parquet type manages to construct column buffers of the
// appropriate type, which ensures that we are packing as many values as we
// can in memory.
//
// See Type.NewColumnBuffer for details about how these types get created.
// =============================================================================

type booleanColumnBuffer struct{ booleanPage }

func newBooleanColumnBuffer(typ Type, columnIndex int16, numValues int32) *booleanColumnBuffer {
	// Boolean values are bit-packed, we can fit up to 8 values per byte.
	bufferSize := (numValues + 7) / 8
	return &booleanColumnBuffer{
		booleanPage: booleanPage{
			typ:         typ,
			bits:        make([]byte, 0, bufferSize),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *booleanColumnBuffer) Clone() ColumnBuffer {
	return &booleanColumnBuffer{
		booleanPage: booleanPage{
			typ:         col.typ,
			bits:        append([]byte{}, col.bits...),
			offset:      col.offset,
			numValues:   col.numValues,
			columnIndex: col.columnIndex,
		},
	}
}

func (col *booleanColumnBuffer) ColumnIndex() ColumnIndex {
	return booleanColumnIndex{&col.booleanPage}
}

func (col *booleanColumnBuffer) OffsetIndex() OffsetIndex {
	return booleanOffsetIndex{&col.booleanPage}
}

func (col *booleanColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *booleanColumnBuffer) Dictionary() Dictionary { return nil }

func (col *booleanColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *booleanColumnBuffer) Page() Page { return &col.booleanPage }

func (col *booleanColumnBuffer) Reset() {
	col.bits = col.bits[:0]
	col.offset = 0
	col.numValues = 0
}

func (col *booleanColumnBuffer) Cap() int { return 8 * cap(col.bits) }

func (col *booleanColumnBuffer) Len() int { return int(col.numValues) }

func (col *booleanColumnBuffer) Less(i, j int) bool {
	a := col.valueAt(i)
	b := col.valueAt(j)
	return a != b && !a
}

func (col *booleanColumnBuffer) valueAt(i int) bool {
	j := uint32(i) / 8
	k := uint32(i) % 8
	return ((col.bits[j] >> k) & 1) != 0
}

func (col *booleanColumnBuffer) setValueAt(i int, v bool) {
	// `offset` is always zero in the page of a column buffer
	j := uint32(i) / 8
	k := uint32(i) % 8
	x := byte(0)
	if v {
		x = 1
	}
	col.bits[j] = (col.bits[j] & ^(1 << k)) | (x << k)
}

func (col *booleanColumnBuffer) Swap(i, j int) {
	a := col.valueAt(i)
	b := col.valueAt(j)
	col.setValueAt(i, b)
	col.setValueAt(j, a)
}

func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) {
	col.writeValues(sparse.MakeBoolArray(values).UnsafeArray(), columnLevels{})
	return len(values), nil
}

func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
	return len(values), nil
}

func (col *booleanColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	numBytes := bitpack.ByteCount(uint(col.numValues) + uint(rows.Len()))
	if cap(col.bits) < numBytes {
		col.bits = append(make([]byte, 0, max(numBytes, 2*cap(col.bits))), col.bits...)
	}
	col.bits = col.bits[:numBytes]
	i := 0
	r := 8 - (int(col.numValues) % 8)
	bytes := rows.Uint8Array()

	if r <= bytes.Len() {
		// First we attempt to write enough bits to align the number of values
		// in the column buffer on 8 bytes. After this step the next bit should
		// be written at the zero'th index of a byte of the buffer.
		if r < 8 {
			var b byte
			for i < r {
				v := bytes.Index(i)
				b |= (v & 1) << uint(i)
				i++
			}
			x := uint(col.numValues) / 8
			y := uint(col.numValues) % 8
			col.bits[x] = (b << y) | (col.bits[x] & ^(0xFF << y))
			col.numValues += int32(i)
		}

		if n := ((bytes.Len() - i) / 8) * 8; n > 0 {
			// At this stage, we know that that we have at least 8 bits to write
			// and the bits will be aligned on the address of a byte in the
			// output buffer. We can work on 8 values per loop iteration,
			// packing them into a single byte and writing it to the output
			// buffer. This effectively reduces by 87.5% the number of memory
			// stores that the program needs to perform to generate the values.
			i += sparse.GatherBits(col.bits[col.numValues/8:], bytes.Slice(i, i+n))
			col.numValues += int32(n)
		}
	}

	for i < bytes.Len() {
		x := uint(col.numValues) / 8
		y := uint(col.numValues) % 8
		b := bytes.Index(i)
		col.bits[x] = ((b & 1) << y) | (col.bits[x] & ^(1 << y))
		col.numValues++
		i++
	}

	col.bits = col.bits[:bitpack.ByteCount(uint(col.numValues))]
}

func (col *booleanColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(col.numValues))
	case i >= int(col.numValues):
		return 0, io.EOF
	default:
		for n < len(values) && i < int(col.numValues) {
			values[n] = col.makeValue(col.valueAt(i))
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type int32ColumnBuffer struct{ int32Page }

func newInt32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int32ColumnBuffer {
	return &int32ColumnBuffer{
		int32Page: int32Page{
			typ:         typ,
			values:      make([]int32, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *int32ColumnBuffer) Clone() ColumnBuffer {
	return &int32ColumnBuffer{
		int32Page: int32Page{
			typ:         col.typ,
			values:      append([]int32{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *int32ColumnBuffer) ColumnIndex() ColumnIndex { return int32ColumnIndex{&col.int32Page} }

func (col *int32ColumnBuffer) OffsetIndex() OffsetIndex { return int32OffsetIndex{&col.int32Page} }

func (col *int32ColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *int32ColumnBuffer) Dictionary() Dictionary { return nil }

func (col *int32ColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *int32ColumnBuffer) Page() Page { return &col.int32Page }

func (col *int32ColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *int32ColumnBuffer) Cap() int { return cap(col.values) }

func (col *int32ColumnBuffer) Len() int { return len(col.values) }

func (col *int32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }

func (col *int32ColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *int32ColumnBuffer) Write(b []byte) (int, error) {
	if (len(b) % 4) != 0 {
		return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
	}
	col.values = append(col.values, unsafecast.BytesToInt32(b)...)
	return len(b), nil
}

func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) {
	col.values = append(col.values, values...)
	return len(values), nil
}

func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
	return len(values), nil
}

func (col *int32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	if n := len(col.values) + rows.Len(); n > cap(col.values) {
		col.values = append(make([]int32, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+rows.Len()]
	sparse.GatherInt32(col.values[n:], rows.Int32Array())

}

func (col *int32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type int64ColumnBuffer struct{ int64Page }

func newInt64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int64ColumnBuffer {
	return &int64ColumnBuffer{
		int64Page: int64Page{
			typ:         typ,
			values:      make([]int64, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *int64ColumnBuffer) Clone() ColumnBuffer {
	return &int64ColumnBuffer{
		int64Page: int64Page{
			typ:         col.typ,
			values:      append([]int64{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *int64ColumnBuffer) ColumnIndex() ColumnIndex { return int64ColumnIndex{&col.int64Page} }

func (col *int64ColumnBuffer) OffsetIndex() OffsetIndex { return int64OffsetIndex{&col.int64Page} }

func (col *int64ColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *int64ColumnBuffer) Dictionary() Dictionary { return nil }

func (col *int64ColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *int64ColumnBuffer) Page() Page { return &col.int64Page }

func (col *int64ColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *int64ColumnBuffer) Cap() int { return cap(col.values) }

func (col *int64ColumnBuffer) Len() int { return len(col.values) }

func (col *int64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }

func (col *int64ColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *int64ColumnBuffer) Write(b []byte) (int, error) {
	if (len(b) % 8) != 0 {
		return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
	}
	col.values = append(col.values, unsafecast.BytesToInt64(b)...)
	return len(b), nil
}

func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) {
	col.values = append(col.values, values...)
	return len(values), nil
}

func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
	return len(values), nil
}

func (col *int64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	if n := len(col.values) + rows.Len(); n > cap(col.values) {
		col.values = append(make([]int64, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+rows.Len()]
	sparse.GatherInt64(col.values[n:], rows.Int64Array())
}

func (col *int64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type int96ColumnBuffer struct{ int96Page }

func newInt96ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int96ColumnBuffer {
	return &int96ColumnBuffer{
		int96Page: int96Page{
			typ:         typ,
			values:      make([]deprecated.Int96, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *int96ColumnBuffer) Clone() ColumnBuffer {
	return &int96ColumnBuffer{
		int96Page: int96Page{
			typ:         col.typ,
			values:      append([]deprecated.Int96{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *int96ColumnBuffer) ColumnIndex() ColumnIndex { return int96ColumnIndex{&col.int96Page} }

func (col *int96ColumnBuffer) OffsetIndex() OffsetIndex { return int96OffsetIndex{&col.int96Page} }

func (col *int96ColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *int96ColumnBuffer) Dictionary() Dictionary { return nil }

func (col *int96ColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *int96ColumnBuffer) Page() Page { return &col.int96Page }

func (col *int96ColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *int96ColumnBuffer) Cap() int { return cap(col.values) }

func (col *int96ColumnBuffer) Len() int { return len(col.values) }

func (col *int96ColumnBuffer) Less(i, j int) bool { return col.values[i].Less(col.values[j]) }

func (col *int96ColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *int96ColumnBuffer) Write(b []byte) (int, error) {
	if (len(b) % 12) != 0 {
		return 0, fmt.Errorf("cannot write INT96 values from input of size %d", len(b))
	}
	col.values = append(col.values, deprecated.BytesToInt96(b)...)
	return len(b), nil
}

func (col *int96ColumnBuffer) WriteInt96s(values []deprecated.Int96) (int, error) {
	col.values = append(col.values, values...)
	return len(values), nil
}

func (col *int96ColumnBuffer) WriteValues(values []Value) (int, error) {
	for _, v := range values {
		col.values = append(col.values, v.Int96())
	}
	return len(values), nil
}

func (col *int96ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	for i := 0; i < rows.Len(); i++ {
		p := rows.Index(i)
		col.values = append(col.values, *(*deprecated.Int96)(p))
	}
}

func (col *int96ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type floatColumnBuffer struct{ floatPage }

func newFloatColumnBuffer(typ Type, columnIndex int16, numValues int32) *floatColumnBuffer {
	return &floatColumnBuffer{
		floatPage: floatPage{
			typ:         typ,
			values:      make([]float32, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *floatColumnBuffer) Clone() ColumnBuffer {
	return &floatColumnBuffer{
		floatPage: floatPage{
			typ:         col.typ,
			values:      append([]float32{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *floatColumnBuffer) ColumnIndex() ColumnIndex { return floatColumnIndex{&col.floatPage} }

func (col *floatColumnBuffer) OffsetIndex() OffsetIndex { return floatOffsetIndex{&col.floatPage} }

func (col *floatColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *floatColumnBuffer) Dictionary() Dictionary { return nil }

func (col *floatColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *floatColumnBuffer) Page() Page { return &col.floatPage }

func (col *floatColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *floatColumnBuffer) Cap() int { return cap(col.values) }

func (col *floatColumnBuffer) Len() int { return len(col.values) }

func (col *floatColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }

func (col *floatColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *floatColumnBuffer) Write(b []byte) (int, error) {
	if (len(b) % 4) != 0 {
		return 0, fmt.Errorf("cannot write FLOAT values from input of size %d", len(b))
	}
	col.values = append(col.values, unsafecast.BytesToFloat32(b)...)
	return len(b), nil
}

func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) {
	col.values = append(col.values, values...)
	return len(values), nil
}

func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
	return len(values), nil
}

func (col *floatColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	if n := len(col.values) + rows.Len(); n > cap(col.values) {
		col.values = append(make([]float32, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+rows.Len()]
	sparse.GatherFloat32(col.values[n:], rows.Float32Array())
}

func (col *floatColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type doubleColumnBuffer struct{ doublePage }

func newDoubleColumnBuffer(typ Type, columnIndex int16, numValues int32) *doubleColumnBuffer {
	return &doubleColumnBuffer{
		doublePage: doublePage{
			typ:         typ,
			values:      make([]float64, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *doubleColumnBuffer) Clone() ColumnBuffer {
	return &doubleColumnBuffer{
		doublePage: doublePage{
			typ:         col.typ,
			values:      append([]float64{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *doubleColumnBuffer) ColumnIndex() ColumnIndex { return doubleColumnIndex{&col.doublePage} }

func (col *doubleColumnBuffer) OffsetIndex() OffsetIndex { return doubleOffsetIndex{&col.doublePage} }

func (col *doubleColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *doubleColumnBuffer) Dictionary() Dictionary { return nil }

func (col *doubleColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *doubleColumnBuffer) Page() Page { return &col.doublePage }

func (col *doubleColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *doubleColumnBuffer) Cap() int { return cap(col.values) }

func (col *doubleColumnBuffer) Len() int { return len(col.values) }

func (col *doubleColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }

func (col *doubleColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *doubleColumnBuffer) Write(b []byte) (int, error) {
	if (len(b) % 8) != 0 {
		return 0, fmt.Errorf("cannot write DOUBLE values from input of size %d", len(b))
	}
	col.values = append(col.values, unsafecast.BytesToFloat64(b)...)
	return len(b), nil
}

func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, error) {
	col.values = append(col.values, values...)
	return len(values), nil
}

func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
	return len(values), nil
}

func (col *doubleColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	if n := len(col.values) + rows.Len(); n > cap(col.values) {
		col.values = append(make([]float64, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+rows.Len()]
	sparse.GatherFloat64(col.values[n:], rows.Float64Array())
}

func (col *doubleColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type byteArrayColumnBuffer struct {
	byteArrayPage
	lengths []uint32
	scratch []byte
}

func newByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *byteArrayColumnBuffer {
	return &byteArrayColumnBuffer{
		byteArrayPage: byteArrayPage{
			typ:         typ,
			values:      make([]byte, 0, typ.EstimateSize(int(numValues))),
			offsets:     make([]uint32, 0, numValues+1),
			columnIndex: ^columnIndex,
		},
		lengths: make([]uint32, 0, numValues),
	}
}

func (col *byteArrayColumnBuffer) Clone() ColumnBuffer {
	return &byteArrayColumnBuffer{
		byteArrayPage: byteArrayPage{
			typ:         col.typ,
			values:      col.cloneValues(),
			offsets:     col.cloneOffsets(),
			columnIndex: col.columnIndex,
		},
		lengths: col.cloneLengths(),
	}
}

func (col *byteArrayColumnBuffer) cloneLengths() []uint32 {
	lengths := make([]uint32, len(col.lengths))
	copy(lengths, col.lengths)
	return lengths
}

func (col *byteArrayColumnBuffer) ColumnIndex() ColumnIndex {
	return byteArrayColumnIndex{&col.byteArrayPage}
}

func (col *byteArrayColumnBuffer) OffsetIndex() OffsetIndex {
	return byteArrayOffsetIndex{&col.byteArrayPage}
}

func (col *byteArrayColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *byteArrayColumnBuffer) Dictionary() Dictionary { return nil }

func (col *byteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *byteArrayColumnBuffer) Page() Page {
	if len(col.lengths) > 0 && orderOfUint32(col.offsets) < 1 { // unordered?
		if cap(col.scratch) < len(col.values) {
			col.scratch = make([]byte, 0, cap(col.values))
		} else {
			col.scratch = col.scratch[:0]
		}

		for i := range col.lengths {
			n := len(col.scratch)
			col.scratch = append(col.scratch, col.index(i)...)
			col.offsets[i] = uint32(n)
		}

		col.values, col.scratch = col.scratch, col.values
	}
	// The offsets have the total length as the last item. Since we are about to
	// expose the column buffer's internal state as a Page value we ensure that
	// the last offset is the total length of all values.
	col.offsets = append(col.offsets[:len(col.lengths)], uint32(len(col.values)))
	return &col.byteArrayPage
}

func (col *byteArrayColumnBuffer) Reset() {
	col.values = col.values[:0]
	col.offsets = col.offsets[:0]
	col.lengths = col.lengths[:0]
}

func (col *byteArrayColumnBuffer) NumRows() int64 { return int64(col.Len()) }

func (col *byteArrayColumnBuffer) NumValues() int64 { return int64(col.Len()) }

func (col *byteArrayColumnBuffer) Cap() int { return cap(col.lengths) }

func (col *byteArrayColumnBuffer) Len() int { return len(col.lengths) }

func (col *byteArrayColumnBuffer) Less(i, j int) bool {
	return bytes.Compare(col.index(i), col.index(j)) < 0
}

func (col *byteArrayColumnBuffer) Swap(i, j int) {
	col.offsets[i], col.offsets[j] = col.offsets[j], col.offsets[i]
	col.lengths[i], col.lengths[j] = col.lengths[j], col.lengths[i]
}

func (col *byteArrayColumnBuffer) Write(b []byte) (int, error) {
	_, n, err := col.writeByteArrays(b)
	return n, err
}

func (col *byteArrayColumnBuffer) WriteByteArrays(values []byte) (int, error) {
	n, _, err := col.writeByteArrays(values)
	return n, err
}

func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes int, err error) {
	baseCount := len(col.lengths)
	baseBytes := len(col.values) + (plain.ByteArrayLengthSize * len(col.lengths))

	err = plain.RangeByteArray(values, func(value []byte) error {
		col.append(unsafecast.BytesToString(value))
		return nil
	})

	count = len(col.lengths) - baseCount
	bytes = (len(col.values) - baseBytes) + (plain.ByteArrayLengthSize * count)
	return count, bytes, err
}

func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.ptr)), columnLevels{})
	return len(values), nil
}

func (col *byteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	for i := 0; i < rows.Len(); i++ {
		p := rows.Index(i)
		col.append(*(*string)(p))
	}
}

func (col *byteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.lengths)))
	case i >= len(col.lengths):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.lengths) {
			values[n] = col.makeValueBytes(col.index(i))
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

func (col *byteArrayColumnBuffer) append(value string) {
	col.offsets = append(col.offsets, uint32(len(col.values)))
	col.lengths = append(col.lengths, uint32(len(value)))
	col.values = append(col.values, value...)
}

func (col *byteArrayColumnBuffer) index(i int) []byte {
	offset := col.offsets[i]
	length := col.lengths[i]
	end := offset + length
	return col.values[offset:end:end]
}

type fixedLenByteArrayColumnBuffer struct {
	fixedLenByteArrayPage
	tmp []byte
}

func newFixedLenByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *fixedLenByteArrayColumnBuffer {
	size := typ.Length()
	return &fixedLenByteArrayColumnBuffer{
		fixedLenByteArrayPage: fixedLenByteArrayPage{
			typ:         typ,
			size:        size,
			data:        make([]byte, 0, typ.EstimateSize(int(numValues))),
			columnIndex: ^columnIndex,
		},
		tmp: make([]byte, size),
	}
}

func (col *fixedLenByteArrayColumnBuffer) Clone() ColumnBuffer {
	return &fixedLenByteArrayColumnBuffer{
		fixedLenByteArrayPage: fixedLenByteArrayPage{
			typ:         col.typ,
			size:        col.size,
			data:        append([]byte{}, col.data...),
			columnIndex: col.columnIndex,
		},
		tmp: make([]byte, col.size),
	}
}

func (col *fixedLenByteArrayColumnBuffer) ColumnIndex() ColumnIndex {
	return fixedLenByteArrayColumnIndex{&col.fixedLenByteArrayPage}
}

func (col *fixedLenByteArrayColumnBuffer) OffsetIndex() OffsetIndex {
	return fixedLenByteArrayOffsetIndex{&col.fixedLenByteArrayPage}
}

func (col *fixedLenByteArrayColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *fixedLenByteArrayColumnBuffer) Dictionary() Dictionary { return nil }

func (col *fixedLenByteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *fixedLenByteArrayColumnBuffer) Page() Page { return &col.fixedLenByteArrayPage }

func (col *fixedLenByteArrayColumnBuffer) Reset() { col.data = col.data[:0] }

func (col *fixedLenByteArrayColumnBuffer) Cap() int { return cap(col.data) / col.size }

func (col *fixedLenByteArrayColumnBuffer) Len() int { return len(col.data) / col.size }

func (col *fixedLenByteArrayColumnBuffer) Less(i, j int) bool {
	return bytes.Compare(col.index(i), col.index(j)) < 0
}

func (col *fixedLenByteArrayColumnBuffer) Swap(i, j int) {
	t, u, v := col.tmp[:col.size], col.index(i), col.index(j)
	copy(t, u)
	copy(u, v)
	copy(v, t)
}

func (col *fixedLenByteArrayColumnBuffer) index(i int) []byte {
	j := (i + 0) * col.size
	k := (i + 1) * col.size
	return col.data[j:k:k]
}

func (col *fixedLenByteArrayColumnBuffer) Write(b []byte) (int, error) {
	n, err := col.WriteFixedLenByteArrays(b)
	return n * col.size, err
}

func (col *fixedLenByteArrayColumnBuffer) WriteFixedLenByteArrays(values []byte) (int, error) {
	d, m := len(values)/col.size, len(values)%col.size
	if m != 0 {
		return 0, fmt.Errorf("cannot write FIXED_LEN_BYTE_ARRAY values of size %d from input of size %d", col.size, len(values))
	}
	col.data = append(col.data, values...)
	return d, nil
}

func (col *fixedLenByteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
	for _, v := range values {
		col.data = append(col.data, v.byteArray()...)
	}
	return len(values), nil
}

func (col *fixedLenByteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	n := col.size * rows.Len()
	i := len(col.data)
	j := len(col.data) + n

	if cap(col.data) < j {
		col.data = append(make([]byte, 0, max(i+n, 2*cap(col.data))), col.data...)
	}

	col.data = col.data[:j]
	newData := col.data[i:]

	for i := 0; i < rows.Len(); i++ {
		p := rows.Index(i)
		copy(newData[i*col.size:], unsafe.Slice((*byte)(p), col.size))
	}
}

func (col *fixedLenByteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset) * col.size
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.data)/col.size))
	case i >= len(col.data):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.data) {
			values[n] = col.makeValueBytes(col.data[i : i+col.size])
			n++
			i += col.size
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type uint32ColumnBuffer struct{ uint32Page }

func newUint32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint32ColumnBuffer {
	return &uint32ColumnBuffer{
		uint32Page: uint32Page{
			typ:         typ,
			values:      make([]uint32, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *uint32ColumnBuffer) Clone() ColumnBuffer {
	return &uint32ColumnBuffer{
		uint32Page: uint32Page{
			typ:         col.typ,
			values:      append([]uint32{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *uint32ColumnBuffer) ColumnIndex() ColumnIndex { return uint32ColumnIndex{&col.uint32Page} }

func (col *uint32ColumnBuffer) OffsetIndex() OffsetIndex { return uint32OffsetIndex{&col.uint32Page} }

func (col *uint32ColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *uint32ColumnBuffer) Dictionary() Dictionary { return nil }

func (col *uint32ColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *uint32ColumnBuffer) Page() Page { return &col.uint32Page }

func (col *uint32ColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *uint32ColumnBuffer) Cap() int { return cap(col.values) }

func (col *uint32ColumnBuffer) Len() int { return len(col.values) }

func (col *uint32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }

func (col *uint32ColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *uint32ColumnBuffer) Write(b []byte) (int, error) {
	if (len(b) % 4) != 0 {
		return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
	}
	col.values = append(col.values, unsafecast.BytesToUint32(b)...)
	return len(b), nil
}

func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) {
	col.values = append(col.values, values...)
	return len(values), nil
}

func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
	return len(values), nil
}

func (col *uint32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	if n := len(col.values) + rows.Len(); n > cap(col.values) {
		col.values = append(make([]uint32, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+rows.Len()]
	sparse.GatherUint32(col.values[n:], rows.Uint32Array())
}

func (col *uint32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type uint64ColumnBuffer struct{ uint64Page }

func newUint64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint64ColumnBuffer {
	return &uint64ColumnBuffer{
		uint64Page: uint64Page{
			typ:         typ,
			values:      make([]uint64, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *uint64ColumnBuffer) Clone() ColumnBuffer {
	return &uint64ColumnBuffer{
		uint64Page: uint64Page{
			typ:         col.typ,
			values:      append([]uint64{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *uint64ColumnBuffer) ColumnIndex() ColumnIndex { return uint64ColumnIndex{&col.uint64Page} }

func (col *uint64ColumnBuffer) OffsetIndex() OffsetIndex { return uint64OffsetIndex{&col.uint64Page} }

func (col *uint64ColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *uint64ColumnBuffer) Dictionary() Dictionary { return nil }

func (col *uint64ColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *uint64ColumnBuffer) Page() Page { return &col.uint64Page }

func (col *uint64ColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *uint64ColumnBuffer) Cap() int { return cap(col.values) }

func (col *uint64ColumnBuffer) Len() int { return len(col.values) }

func (col *uint64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }

func (col *uint64ColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *uint64ColumnBuffer) Write(b []byte) (int, error) {
	if (len(b) % 8) != 0 {
		return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
	}
	col.values = append(col.values, unsafecast.BytesToUint64(b)...)
	return len(b), nil
}

func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, error) {
	col.values = append(col.values, values...)
	return len(values), nil
}

func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) {
	var model Value
	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
	return len(values), nil
}

func (col *uint64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	if n := len(col.values) + rows.Len(); n > cap(col.values) {
		col.values = append(make([]uint64, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+rows.Len()]
	sparse.GatherUint64(col.values[n:], rows.Uint64Array())
}

func (col *uint64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

type be128ColumnBuffer struct{ be128Page }

func newBE128ColumnBuffer(typ Type, columnIndex int16, numValues int32) *be128ColumnBuffer {
	return &be128ColumnBuffer{
		be128Page: be128Page{
			typ:         typ,
			values:      make([][16]byte, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *be128ColumnBuffer) Clone() ColumnBuffer {
	return &be128ColumnBuffer{
		be128Page: be128Page{
			typ:         col.typ,
			values:      append([][16]byte{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *be128ColumnBuffer) ColumnIndex() ColumnIndex {
	return be128ColumnIndex{&col.be128Page}
}

func (col *be128ColumnBuffer) OffsetIndex() OffsetIndex {
	return be128OffsetIndex{&col.be128Page}
}

func (col *be128ColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *be128ColumnBuffer) Dictionary() Dictionary { return nil }

func (col *be128ColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *be128ColumnBuffer) Page() Page { return &col.be128Page }

func (col *be128ColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *be128ColumnBuffer) Cap() int { return cap(col.values) }

func (col *be128ColumnBuffer) Len() int { return len(col.values) }

func (col *be128ColumnBuffer) Less(i, j int) bool {
	return lessBE128(&col.values[i], &col.values[j])
}

func (col *be128ColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *be128ColumnBuffer) WriteValues(values []Value) (int, error) {
	if n := len(col.values) + len(values); n > cap(col.values) {
		col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+len(values)]
	newValues := col.values[n:]
	for i, v := range values {
		copy(newValues[i][:], v.byteArray())
	}
	return len(values), nil
}

func (col *be128ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	if n := len(col.values) + rows.Len(); n > cap(col.values) {
		col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...)
	}
	n := len(col.values)
	col.values = col.values[:n+rows.Len()]
	sparse.GatherUint128(col.values[n:], rows.Uint128Array())
}

func (col *be128ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.makeValue(&col.values[i])
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

var (
	_ sort.Interface = (ColumnBuffer)(nil)
	_ io.Writer      = (*byteArrayColumnBuffer)(nil)
	_ io.Writer      = (*fixedLenByteArrayColumnBuffer)(nil)
)


================================================
FILE: column_buffer_amd64.go
================================================
//go:build !purego

package parquet

import (
	"github.com/segmentio/parquet-go/internal/bytealg"
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
	"golang.org/x/sys/cpu"
)

func broadcastValueInt32(dst []int32, src int8) {
	bytealg.Broadcast(unsafecast.Int32ToBytes(dst), byte(src))
}

//go:noescape
func broadcastRangeInt32AVX2(dst []int32, base int32)

func broadcastRangeInt32(dst []int32, base int32) {
	if len(dst) >= 8 && cpu.X86.HasAVX2 {
		broadcastRangeInt32AVX2(dst, base)
	} else {
		for i := range dst {
			dst[i] = base + int32(i)
		}
	}
}

//go:noescape
func writePointersBE128(values [][16]byte, rows sparse.Array)


================================================
FILE: column_buffer_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func broadcastRangeInt32AVX2(dst []int32, base int32)
TEXT ·broadcastRangeInt32AVX2(SB), NOSPLIT, $0-28
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), BX
    MOVL base+24(FP), CX
    XORQ SI, SI

    CMPQ BX, $8
    JB test1x4

    VMOVDQU ·range0n8(SB), Y0         // [0,1,2,3,4,5,6,7]
    VPBROADCASTD ·range0n8+32(SB), Y1 // [8,8,8,8,8,8,8,8]
    VPBROADCASTD base+24(FP), Y2      // [base...]
    VPADDD Y2, Y0, Y0                 // [base,base+1,...]

    MOVQ BX, DI
    SHRQ $3, DI
    SHLQ $3, DI
    JMP test8x4
loop8x4:
    VMOVDQU Y0, (AX)(SI*4)
    VPADDD Y1, Y0, Y0
    ADDQ $8, SI
test8x4:
    CMPQ SI, DI
    JNE loop8x4
    VZEROUPPER
    JMP test1x4

loop1x4:
    INCQ SI
    MOVL CX, DX
    IMULL SI, DX
    MOVL DX, -4(AX)(SI*4)
test1x4:
    CMPQ SI, BX
    JNE loop1x4
    RET

// func writePointersBE128(values [][16]byte, rows sparse.Array)
TEXT ·writePointersBE128(SB), NOSPLIT, $0-48
    MOVQ values_base+0(FP), AX
    MOVQ rows_array_ptr+24(FP), BX
    MOVQ rows_array_len+32(FP), CX
    MOVQ rows_array_off+40(FP), DX

    XORQ SI, SI
    JMP test
loop:
    PXOR X0, X0
    MOVQ (BX), DI // *[16]byte
    CMPQ DI, $0
    JE next
    MOVOU (DI), X0
next:
    MOVOU X0, (AX)
    ADDQ $16, AX
    ADDQ DX, BX
    INCQ SI
test:
    CMPQ SI, CX
    JNE loop
    RET


================================================
FILE: column_buffer_go18.go
================================================
//go:build go1.18

package parquet

import (
	"encoding/json"
	"math/bits"
	"reflect"
	"time"
	"unsafe"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
)

// writeRowsFunc is the type of functions that apply rows to a set of column
// buffers.
//
// - columns is the array of column buffer where the rows are written.
//
// - rows is the array of Go values to write to the column buffers.
//
//   - levels is used to track the column index, repetition and definition levels
//     of values when writing optional or repeated columns.
type writeRowsFunc func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error

// writeRowsFuncOf generates a writeRowsFunc function for the given Go type and
// parquet schema. The column path indicates the column that the function is
// being generated for in the parquet schema.
func writeRowsFuncOf(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	if leaf, exists := schema.Lookup(path...); exists && leaf.Node.Type().LogicalType() != nil && leaf.Node.Type().LogicalType().Json != nil {
		return writeRowsFuncOfJSON(t, schema, path)
	}

	switch t {
	case reflect.TypeOf(deprecated.Int96{}):
		return writeRowsFuncOfRequired(t, schema, path)
	case reflect.TypeOf(time.Time{}):
		return writeRowsFuncOfTime(t, schema, path)
	}

	switch t.Kind() {
	case reflect.Bool,
		reflect.Int,
		reflect.Uint,
		reflect.Int32,
		reflect.Uint32,
		reflect.Int64,
		reflect.Uint64,
		reflect.Float32,
		reflect.Float64,
		reflect.String:
		return writeRowsFuncOfRequired(t, schema, path)

	case reflect.Slice:
		if t.Elem().Kind() == reflect.Uint8 {
			return writeRowsFuncOfRequired(t, schema, path)
		} else {
			return writeRowsFuncOfSlice(t, schema, path)
		}

	case reflect.Array:
		if t.Elem().Kind() == reflect.Uint8 {
			return writeRowsFuncOfRequired(t, schema, path)
		}

	case reflect.Pointer:
		return writeRowsFuncOfPointer(t, schema, path)

	case reflect.Struct:
		return writeRowsFuncOfStruct(t, schema, path)

	case reflect.Map:
		return writeRowsFuncOfMap(t, schema, path)
	}

	panic("cannot convert Go values of type " + typeNameOf(t) + " to parquet value")
}

func writeRowsFuncOfRequired(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	column := schema.mapping.lookup(path)
	columnIndex := column.columnIndex
	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		columns[columnIndex].writeValues(rows, levels)
		return nil
	}
}

func writeRowsFuncOfOptional(t reflect.Type, schema *Schema, path columnPath, writeRows writeRowsFunc) writeRowsFunc {
	nullIndex := nullIndexFuncOf(t)
	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		if rows.Len() == 0 {
			return writeRows(columns, rows, levels)
		}

		nulls := acquireBitmap(rows.Len())
		defer releaseBitmap(nulls)
		nullIndex(nulls.bits, rows)

		nullLevels := levels
		levels.definitionLevel++
		// In this function, we are dealing with optional values which are
		// neither pointers nor slices; for example, a int32 field marked
		// "optional" in its parent struct.
		//
		// We need to find zero values, which should be represented as nulls
		// in the parquet column. In order to minimize the calls to writeRows
		// and maximize throughput, we use the nullIndex and nonNullIndex
		// functions, which are type-specific implementations of the algorithm.
		//
		// Sections of the input that are contiguous nulls or non-nulls can be
		// sent to a single call to writeRows to be written to the underlying
		// buffer since they share the same definition level.
		//
		// This optimization is defeated by inputs alternating null and non-null
		// sequences of single values, we do not expect this condition to be a
		// common case.
		for i := 0; i < rows.Len(); {
			j := 0
			x := i / 64
			y := i % 64

			if y != 0 {
				if b := nulls.bits[x] >> uint(y); b == 0 {
					x++
					y = 0
				} else {
					y += bits.TrailingZeros64(b)
					goto writeNulls
				}
			}

			for x < len(nulls.bits) && nulls.bits[x] == 0 {
				x++
			}

			if x < len(nulls.bits) {
				y = bits.TrailingZeros64(nulls.bits[x]) % 64
			}

		writeNulls:
			if j = x*64 + y; j > rows.Len() {
				j = rows.Len()
			}

			if i < j {
				if err := writeRows(columns, rows.Slice(i, j), nullLevels); err != nil {
					return err
				}
				i = j
			}

			if y != 0 {
				if b := nulls.bits[x] >> uint(y); b == (1<<uint64(y))-1 {
					x++
					y = 0
				} else {
					y += bits.TrailingZeros64(^b)
					goto writeNonNulls
				}
			}

			for x < len(nulls.bits) && nulls.bits[x] == ^uint64(0) {
				x++
			}

			if x < len(nulls.bits) {
				y = bits.TrailingZeros64(^nulls.bits[x]) % 64
			}

		writeNonNulls:
			if j = x*64 + y; j > rows.Len() {
				j = rows.Len()
			}

			if i < j {
				if err := writeRows(columns, rows.Slice(i, j), levels); err != nil {
					return err
				}
				i = j
			}
		}

		return nil
	}
}

func writeRowsFuncOfPointer(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	elemType := t.Elem()
	elemSize := uintptr(elemType.Size())
	writeRows := writeRowsFuncOf(elemType, schema, path)

	if len(path) == 0 {
		// This code path is taken when generating a writeRowsFunc for a pointer
		// type. In this case, we do not need to increase the definition level
		// since we are not deailng with an optional field but a pointer to the
		// row type.
		return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
			if rows.Len() == 0 {
				return writeRows(columns, rows, levels)
			}

			for i := 0; i < rows.Len(); i++ {
				p := *(*unsafe.Pointer)(rows.Index(i))
				a := sparse.Array{}
				if p != nil {
					a = makeArray(p, 1, elemSize)
				}
				if err := writeRows(columns, a, levels); err != nil {
					return err
				}
			}

			return nil
		}
	}

	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		if rows.Len() == 0 {
			return writeRows(columns, rows, levels)
		}

		for i := 0; i < rows.Len(); i++ {
			p := *(*unsafe.Pointer)(rows.Index(i))
			a := sparse.Array{}
			elemLevels := levels
			if p != nil {
				a = makeArray(p, 1, elemSize)
				elemLevels.definitionLevel++
			}
			if err := writeRows(columns, a, elemLevels); err != nil {
				return err
			}
		}

		return nil
	}
}

func writeRowsFuncOfSlice(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	elemType := t.Elem()
	elemSize := uintptr(elemType.Size())
	writeRows := writeRowsFuncOf(elemType, schema, path)

	// When the element is a pointer type, the writeRows function will be an
	// instance returned by writeRowsFuncOfPointer, which handles incrementing
	// the definition level if the pointer value is not nil.
	definitionLevelIncrement := byte(0)
	if elemType.Kind() != reflect.Ptr {
		definitionLevelIncrement = 1
	}

	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		if rows.Len() == 0 {
			return writeRows(columns, rows, levels)
		}

		levels.repetitionDepth++

		for i := 0; i < rows.Len(); i++ {
			p := (*sliceHeader)(rows.Index(i))
			a := makeArray(p.base, p.len, elemSize)
			b := sparse.Array{}

			elemLevels := levels
			if a.Len() > 0 {
				b = a.Slice(0, 1)
				elemLevels.definitionLevel += definitionLevelIncrement
			}

			if err := writeRows(columns, b, elemLevels); err != nil {
				return err
			}

			if a.Len() > 1 {
				elemLevels.repetitionLevel = elemLevels.repetitionDepth

				if err := writeRows(columns, a.Slice(1, a.Len()), elemLevels); err != nil {
					return err
				}
			}
		}

		return nil
	}
}

func writeRowsFuncOfStruct(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	type column struct {
		offset    uintptr
		writeRows writeRowsFunc
	}

	fields := structFieldsOf(t)
	columns := make([]column, len(fields))

	for i, f := range fields {
		optional := false
		columnPath := path.append(f.Name)
		forEachStructTagOption(f, func(_ reflect.Type, option, _ string) {
			switch option {
			case "list":
				columnPath = columnPath.append("list", "element")
			case "optional":
				optional = true
			}
		})

		writeRows := writeRowsFuncOf(f.Type, schema, columnPath)
		if optional {
			switch f.Type.Kind() {
			case reflect.Pointer, reflect.Slice:
			default:
				writeRows = writeRowsFuncOfOptional(f.Type, schema, columnPath, writeRows)
			}
		}

		columns[i] = column{
			offset:    f.Offset,
			writeRows: writeRows,
		}
	}

	return func(buffers []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		if rows.Len() == 0 {
			for _, column := range columns {
				if err := column.writeRows(buffers, rows, levels); err != nil {
					return err
				}
			}
		} else {
			for _, column := range columns {
				if err := column.writeRows(buffers, rows.Offset(column.offset), levels); err != nil {
					return err
				}
			}
		}
		return nil
	}
}

func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	keyPath := path.append("key_value", "key")
	keyType := t.Key()
	keySize := uintptr(keyType.Size())
	writeKeys := writeRowsFuncOf(keyType, schema, keyPath)

	valuePath := path.append("key_value", "value")
	valueType := t.Elem()
	valueSize := uintptr(valueType.Size())
	writeValues := writeRowsFuncOf(valueType, schema, valuePath)

	writeKeyValues := func(columns []ColumnBuffer, keys, values sparse.Array, levels columnLevels) error {
		if err := writeKeys(columns, keys, levels); err != nil {
			return err
		}
		if err := writeValues(columns, values, levels); err != nil {
			return err
		}
		return nil
	}

	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		if rows.Len() == 0 {
			return writeKeyValues(columns, rows, rows, levels)
		}

		levels.repetitionDepth++
		mapKey := reflect.New(keyType).Elem()
		mapValue := reflect.New(valueType).Elem()

		for i := 0; i < rows.Len(); i++ {
			m := reflect.NewAt(t, rows.Index(i)).Elem()

			if m.Len() == 0 {
				empty := sparse.Array{}
				if err := writeKeyValues(columns, empty, empty, levels); err != nil {
					return err
				}
			} else {
				elemLevels := levels
				elemLevels.definitionLevel++

				for it := m.MapRange(); it.Next(); {
					mapKey.SetIterKey(it)
					mapValue.SetIterValue(it)

					k := makeArray(unsafecast.PointerOfValue(mapKey), 1, keySize)
					v := makeArray(unsafecast.PointerOfValue(mapValue), 1, valueSize)

					if err := writeKeyValues(columns, k, v, elemLevels); err != nil {
						return err
					}

					elemLevels.repetitionLevel = elemLevels.repetitionDepth
				}
			}
		}

		return nil
	}
}

func writeRowsFuncOfJSON(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	// If this is a string or a byte array write directly.
	switch t.Kind() {
	case reflect.String:
		return writeRowsFuncOfRequired(t, schema, path)
	case reflect.Slice:
		if t.Elem().Kind() == reflect.Uint8 {
			return writeRowsFuncOfRequired(t, schema, path)
		}
	}

	// Otherwise handle with a json.Marshal
	asStrT := reflect.TypeOf(string(""))
	writer := writeRowsFuncOfRequired(asStrT, schema, path)

	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		if rows.Len() == 0 {
			return writer(columns, rows, levels)
		}
		for i := 0; i < rows.Len(); i++ {
			val := reflect.NewAt(t, rows.Index(i))
			asI := val.Interface()

			b, err := json.Marshal(asI)
			if err != nil {
				return err
			}

			asStr := string(b)
			a := sparse.MakeStringArray([]string{asStr})
			if err := writer(columns, a.UnsafeArray(), levels); err != nil {
				return err
			}
		}
		return nil
	}
}

func writeRowsFuncOfTime(_ reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
	t := reflect.TypeOf(int64(0))
	elemSize := uintptr(t.Size())
	writeRows := writeRowsFuncOf(t, schema, path)

	col, _ := schema.Lookup(path...)
	unit := Nanosecond.TimeUnit()
	lt := col.Node.Type().LogicalType()
	if lt != nil && lt.Timestamp != nil {
		unit = lt.Timestamp.Unit
	}

	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
		if rows.Len() == 0 {
			return writeRows(columns, rows, levels)
		}

		times := rows.TimeArray()
		for i := 0; i < times.Len(); i++ {
			t := times.Index(i)
			var val int64
			switch {
			case unit.Millis != nil:
				val = t.UnixMilli()
			case unit.Micros != nil:
				val = t.UnixMicro()
			default:
				val = t.UnixNano()
			}

			a := makeArray(unsafecast.PointerOfValue(reflect.ValueOf(val)), 1, elemSize)
			if err := writeRows(columns, a, levels); err != nil {
				return err
			}
		}

		return nil
	}
}


================================================
FILE: column_buffer_purego.go
================================================
//go:build !amd64 || purego

package parquet

import "github.com/segmentio/parquet-go/sparse"

func broadcastValueInt32(dst []int32, src int8) {
	value := 0x01010101 * int32(src)
	for i := range dst {
		dst[i] = value
	}
}

func broadcastRangeInt32(dst []int32, base int32) {
	for i := range dst {
		dst[i] = base + int32(i)
	}
}

func writePointersBE128(values [][16]byte, rows sparse.Array) {
	for i := range values {
		p := *(**[16]byte)(rows.Index(i))

		if p != nil {
			values[i] = *p
		} else {
			values[i] = [16]byte{}
		}
	}
}


================================================
FILE: column_buffer_test.go
================================================
package parquet

import (
	"testing"
)

func TestBroadcastValueInt32(t *testing.T) {
	buf := make([]int32, 123)
	broadcastValueInt32(buf, 0x0A)

	for i, v := range buf {
		if v != 0x0A0A0A0A {
			t.Fatalf("wrong value at index %d: %v", i, v)
		}
	}
}

func TestBroadcastRangeInt32(t *testing.T) {
	buf := make([]int32, 123)
	broadcastRangeInt32(buf, 1)

	for i, v := range buf {
		if v != int32(1+i) {
			t.Fatalf("wrong value at index %d: %v", i, v)
		}
	}
}

func BenchmarkBroadcastValueInt32(b *testing.B) {
	buf := make([]int32, 1000)
	for i := 0; i < b.N; i++ {
		broadcastValueInt32(buf, -1)
	}
	b.SetBytes(4 * int64(len(buf)))
}

func BenchmarkBroadcastRangeInt32(b *testing.B) {
	buf := make([]int32, 1000)
	for i := 0; i < b.N; i++ {
		broadcastRangeInt32(buf, 0)
	}
	b.SetBytes(4 * int64(len(buf)))
}

// https://github.com/segmentio/parquet-go/issues/501
func TestIssue501(t *testing.T) {
	col := newBooleanColumnBuffer(BooleanType, 0, 2055208)

	// write all trues and then flush the buffer
	_, err := col.WriteBooleans([]bool{true, true, true, true, true, true, true, true})
	if err != nil {
		t.Fatal(err)
	}
	col.Reset()

	// write a single false, we are trying to trip a certain line of code in WriteBooleans
	_, err = col.WriteBooleans([]bool{false})
	if err != nil {
		t.Fatal(err)
	}
	// now write 7 booleans at once, this will cause WriteBooleans to attempt its "alignment" logic
	_, err = col.WriteBooleans([]bool{false, false, false, false, false, false, false})
	if err != nil {
		panic(err)
	}

	for i := 0; i < 8; i++ {
		read := make([]Value, 1)
		_, err = col.ReadValuesAt(read, int64(i))
		if err != nil {
			t.Fatal(err)
		}
		if read[0].Boolean() {
			t.Fatalf("expected false at index %d", i)
		}
	}
}


================================================
FILE: column_chunk.go
================================================
package parquet

import (
	"io"
)

// The ColumnChunk interface represents individual columns of a row group.
type ColumnChunk interface {
	// Returns the column type.
	Type() Type

	// Returns the index of this column in its parent row group.
	Column() int

	// Returns a reader exposing the pages of the column.
	Pages() Pages

	// Returns the components of the page index for this column chunk,
	// containing details about the content and location of pages within the
	// chunk.
	//
	// Note that the returned value may be the same across calls to these
	// methods, programs must treat those as read-only.
	//
	// If the column chunk does not have a page index, the methods return nil.
	ColumnIndex() ColumnIndex
	OffsetIndex() OffsetIndex
	BloomFilter() BloomFilter

	// Returns the number of values in the column chunk.
	//
	// This quantity may differ from the number of rows in the parent row group
	// because repeated columns may hold zero or more values per row.
	NumValues() int64
}

type pageAndValueWriter interface {
	PageWriter
	ValueWriter
}

type readRowsFunc func(*rowGroupRows, []Row, byte) (int, error)

func readRowsFuncOf(node Node, columnIndex int, repetitionDepth byte) (int, readRowsFunc) {
	var read readRowsFunc

	if node.Repeated() {
		repetitionDepth++
	}

	if node.Leaf() {
		columnIndex, read = readRowsFuncOfLeaf(columnIndex, repetitionDepth)
	} else {
		columnIndex, read = readRowsFuncOfGroup(node, columnIndex, repetitionDepth)
	}

	if node.Repeated() {
		read = readRowsFuncOfRepeated(read, repetitionDepth)
	}

	return columnIndex, read
}

//go:noinline
func readRowsFuncOfRepeated(read readRowsFunc, repetitionDepth byte) readRowsFunc {
	return func(r *rowGroupRows, rows []Row, repetitionLevel byte) (int, error) {
		for i := range rows {
			// Repeated columns have variable number of values, we must process
			// them one row at a time because we cannot predict how many values
			// need to be consumed in each iteration.
			row := rows[i : i+1]

			// The first pass looks for values marking the beginning of a row by
			// having a repetition level equal to the current level.
			n, err := read(r, row, repetitionLevel)
			if err != nil {
				// The error here may likely be io.EOF, the read function may
				// also have successfully read a row, which is indicated by a
				// non-zero count. In this case, we increment the index to
				// indicate to the caller than rows up to i+1 have been read.
				if n > 0 {
					i++
				}
				return i, err
			}

			// The read function may return no errors and also read no rows in
			// case where it had more values to read but none corresponded to
			// the current repetition level. This is an indication that we will
			// not be able to read more rows at this stage, we must return to
			// the caller to let it set the repetition level to its current
			// depth, which may allow us to read more values when called again.
			if n == 0 {
				return i, nil
			}

			// When we reach this stage, we have successfully read the first
			// values of a row of repeated columns. We continue consuming more
			// repeated values until we get the indication that we consumed
			// them all (the read function returns zero and no errors).
			for {
				n, err := read(r, row, repetitionDepth)
				if err != nil {
					return i + 1, err
				}
				if n == 0 {
					break
				}
			}
		}
		return len(rows), nil
	}
}

//go:noinline
func readRowsFuncOfGroup(node Node, columnIndex int, repetitionDepth byte) (int, readRowsFunc) {
	fields := node.Fields()

	if len(fields) == 0 {
		return columnIndex, func(*rowGroupRows, []Row, byte) (int, error) {
			return 0, io.EOF
		}
	}

	if len(fields) == 1 {
		// Small optimization for a somewhat common case of groups with a single
		// column (like nested list elements for example); there is no need to
		// loop over the group of a single element, we can simply skip to calling
		// the inner read function.
		return readRowsFuncOf(fields[0], columnIndex, repetitionDepth)
	}

	group := make([]readRowsFunc, len(fields))
	for i := range group {
		columnIndex, group[i] = readRowsFuncOf(fields[i], columnIndex, repetitionDepth)
	}

	return columnIndex, func(r *rowGroupRows, rows []Row, repetitionLevel byte) (int, error) {
		// When reading a group, we use the first column as an indicator of how
		// may rows can be read during this call.
		n, err := group[0](r, rows, repetitionLevel)

		if n > 0 {
			// Read values for all rows that the group is able to consume.
			// Getting io.EOF from calling the read functions indicate that
			// we consumed all values of that particular column, but there may
			// be more to read in other columns, therefore we must always read
			// all columns and cannot stop on the first error.
			for _, read := range group[1:] {
				_, err2 := read(r, rows[:n], repetitionLevel)
				if err2 != nil && err2 != io.EOF {
					return 0, err2
				}
			}
		}

		return n, err
	}
}

//go:noinline
func readRowsFuncOfLeaf(columnIndex int, repetitionDepth byte) (int, readRowsFunc) {
	var read readRowsFunc

	if repetitionDepth == 0 {
		read = func(r *rowGroupRows, rows []Row, _ byte) (int, error) {
			// When the repetition depth is zero, we know that there is exactly
			// one value per row for this column, and therefore we can consume
			// as many values as there are rows to fill.
			col := &r.columns[columnIndex]
			buf := r.buffer(columnIndex)

			for i := range rows {
				if col.offset == col.length {
					n, err := col.values.ReadValues(buf)
					col.offset = 0
					col.length = int32(n)
					if n == 0 && err != nil {
						return 0, err
					}
				}

				rows[i] = append(rows[i], buf[col.offset])
				col.offset++
			}

			return len(rows), nil
		}
	} else {
		read = func(r *rowGroupRows, rows []Row, repetitionLevel byte) (int, error) {
			// When the repetition depth is not zero, we know that we will be
			// called with a single row as input. We attempt to read at most one
			// value of a single row and return to the caller.
			col := &r.columns[columnIndex]
			buf := r.buffer(columnIndex)

			if col.offset == col.length {
				n, err := col.values.ReadValues(buf)
				col.offset = 0
				col.length = int32(n)
				if n == 0 && err != nil {
					return 0, err
				}
			}

			if buf[col.offset].repetitionLevel != repetitionLevel {
				return 0, nil
			}

			rows[0] = append(rows[0], buf[col.offset])
			col.offset++
			return 1, nil
		}
	}

	return columnIndex + 1, read
}


================================================
FILE: column_index.go
================================================
package parquet

import (
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

type ColumnIndex interface {
	// NumPages returns the number of paged in the column index.
	NumPages() int

	// Returns the number of null values in the page at the given index.
	NullCount(int) int64

	// Tells whether the page at the given index contains null values only.
	NullPage(int) bool

	// PageIndex return min/max bounds for the page at the given index in the
	// column.
	MinValue(int) Value
	MaxValue(int) Value

	// IsAscending returns true if the column index min/max values are sorted
	// in ascending order (based on the ordering rules of the column's logical
	// type).
	IsAscending() bool

	// IsDescending returns true if the column index min/max values are sorted
	// in descending order (based on the ordering rules of the column's logical
	// type).
	IsDescending() bool
}

// NewColumnIndex constructs a ColumnIndex instance from the given parquet
// format column index. The kind argument configures the type of values
func NewColumnIndex(kind Kind, index *format.ColumnIndex) ColumnIndex {
	return &formatColumnIndex{
		kind:  kind,
		index: index,
	}
}

type formatColumnIndex struct {
	kind  Kind
	index *format.ColumnIndex
}

func (f *formatColumnIndex) NumPages() int {
	return len(f.index.MinValues)
}

func (f *formatColumnIndex) NullCount(i int) int64 {
	if len(f.index.NullCounts) > 0 {
		return f.index.NullCounts[i]
	}
	return 0
}

func (f *formatColumnIndex) NullPage(i int) bool {
	return len(f.index.NullPages) > 0 && f.index.NullPages[i]
}

func (f *formatColumnIndex) MinValue(i int) Value {
	if f.NullPage(i) {
		return Value{}
	}
	return f.kind.Value(f.index.MinValues[i])
}

func (f *formatColumnIndex) MaxValue(i int) Value {
	if f.NullPage(i) {
		return Value{}
	}
	return f.kind.Value(f.index.MaxValues[i])
}

func (f *formatColumnIndex) IsAscending() bool {
	return f.index.BoundaryOrder == format.Ascending
}

func (f *formatColumnIndex) IsDescending() bool {
	return f.index.BoundaryOrder == format.Descending
}

type fileColumnIndex struct{ chunk *fileColumnChunk }

func (i fileColumnIndex) NumPages() int {
	return len(i.chunk.columnIndex.NullPages)
}

func (i fileColumnIndex) NullCount(j int) int64 {
	if len(i.chunk.columnIndex.NullCounts) > 0 {
		return i.chunk.columnIndex.NullCounts[j]
	}
	return 0
}

func (i fileColumnIndex) NullPage(j int) bool {
	return len(i.chunk.columnIndex.NullPages) > 0 && i.chunk.columnIndex.NullPages[j]
}

func (i fileColumnIndex) MinValue(j int) Value {
	if i.NullPage(j) {
		return Value{}
	}
	return i.makeValue(i.chunk.columnIndex.MinValues[j])
}

func (i fileColumnIndex) MaxValue(j int) Value {
	if i.NullPage(j) {
		return Value{}
	}
	return i.makeValue(i.chunk.columnIndex.MaxValues[j])
}

func (i fileColumnIndex) IsAscending() bool {
	return i.chunk.columnIndex.BoundaryOrder == format.Ascending
}

func (i fileColumnIndex) IsDescending() bool {
	return i.chunk.columnIndex.BoundaryOrder == format.Descending
}

func (i *fileColumnIndex) makeValue(b []byte) Value {
	return i.chunk.column.typ.Kind().Value(b)
}

type emptyColumnIndex struct{}

func (emptyColumnIndex) NumPages() int       { return 0 }
func (emptyColumnIndex) NullCount(int) int64 { return 0 }
func (emptyColumnIndex) NullPage(int) bool   { return false }
func (emptyColumnIndex) MinValue(int) Value  { return Value{} }
func (emptyColumnIndex) MaxValue(int) Value  { return Value{} }
func (emptyColumnIndex) IsAscending() bool   { return false }
func (emptyColumnIndex) IsDescending() bool  { return false }

type booleanColumnIndex struct{ page *booleanPage }

func (i booleanColumnIndex) NumPages() int       { return 1 }
func (i booleanColumnIndex) NullCount(int) int64 { return 0 }
func (i booleanColumnIndex) NullPage(int) bool   { return false }
func (i booleanColumnIndex) MinValue(int) Value  { return makeValueBoolean(i.page.min()) }
func (i booleanColumnIndex) MaxValue(int) Value  { return makeValueBoolean(i.page.max()) }
func (i booleanColumnIndex) IsAscending() bool   { return false }
func (i booleanColumnIndex) IsDescending() bool  { return false }

type int32ColumnIndex struct{ page *int32Page }

func (i int32ColumnIndex) NumPages() int       { return 1 }
func (i int32ColumnIndex) NullCount(int) int64 { return 0 }
func (i int32ColumnIndex) NullPage(int) bool   { return false }
func (i int32ColumnIndex) MinValue(int) Value  { return makeValueInt32(i.page.min()) }
func (i int32ColumnIndex) MaxValue(int) Value  { return makeValueInt32(i.page.max()) }
func (i int32ColumnIndex) IsAscending() bool   { return false }
func (i int32ColumnIndex) IsDescending() bool  { return false }

type int64ColumnIndex struct{ page *int64Page }

func (i int64ColumnIndex) NumPages() int       { return 1 }
func (i int64ColumnIndex) NullCount(int) int64 { return 0 }
func (i int64ColumnIndex) NullPage(int) bool   { return false }
func (i int64ColumnIndex) MinValue(int) Value  { return makeValueInt64(i.page.min()) }
func (i int64ColumnIndex) MaxValue(int) Value  { return makeValueInt64(i.page.max()) }
func (i int64ColumnIndex) IsAscending() bool   { return false }
func (i int64ColumnIndex) IsDescending() bool  { return false }

type int96ColumnIndex struct{ page *int96Page }

func (i int96ColumnIndex) NumPages() int       { return 1 }
func (i int96ColumnIndex) NullCount(int) int64 { return 0 }
func (i int96ColumnIndex) NullPage(int) bool   { return false }
func (i int96ColumnIndex) MinValue(int) Value  { return makeValueInt96(i.page.min()) }
func (i int96ColumnIndex) MaxValue(int) Value  { return makeValueInt96(i.page.max()) }
func (i int96ColumnIndex) IsAscending() bool   { return false }
func (i int96ColumnIndex) IsDescending() bool  { return false }

type floatColumnIndex struct{ page *floatPage }

func (i floatColumnIndex) NumPages() int       { return 1 }
func (i floatColumnIndex) NullCount(int) int64 { return 0 }
func (i floatColumnIndex) NullPage(int) bool   { return false }
func (i floatColumnIndex) MinValue(int) Value  { return makeValueFloat(i.page.min()) }
func (i floatColumnIndex) MaxValue(int) Value  { return makeValueFloat(i.page.max()) }
func (i floatColumnIndex) IsAscending() bool   { return false }
func (i floatColumnIndex) IsDescending() bool  { return false }

type doubleColumnIndex struct{ page *doublePage }

func (i doubleColumnIndex) NumPages() int       { return 1 }
func (i doubleColumnIndex) NullCount(int) int64 { return 0 }
func (i doubleColumnIndex) NullPage(int) bool   { return false }
func (i doubleColumnIndex) MinValue(int) Value  { return makeValueDouble(i.page.min()) }
func (i doubleColumnIndex) MaxValue(int) Value  { return makeValueDouble(i.page.max()) }
func (i doubleColumnIndex) IsAscending() bool   { return false }
func (i doubleColumnIndex) IsDescending() bool  { return false }

type byteArrayColumnIndex struct{ page *byteArrayPage }

func (i byteArrayColumnIndex) NumPages() int       { return 1 }
func (i byteArrayColumnIndex) NullCount(int) int64 { return 0 }
func (i byteArrayColumnIndex) NullPage(int) bool   { return false }
func (i byteArrayColumnIndex) MinValue(int) Value  { return makeValueBytes(ByteArray, i.page.min()) }
func (i byteArrayColumnIndex) MaxValue(int) Value  { return makeValueBytes(ByteArray, i.page.max()) }
func (i byteArrayColumnIndex) IsAscending() bool   { return false }
func (i byteArrayColumnIndex) IsDescending() bool  { return false }

type fixedLenByteArrayColumnIndex struct{ page *fixedLenByteArrayPage }

func (i fixedLenByteArrayColumnIndex) NumPages() int       { return 1 }
func (i fixedLenByteArrayColumnIndex) NullCount(int) int64 { return 0 }
func (i fixedLenByteArrayColumnIndex) NullPage(int) bool   { return false }
func (i fixedLenByteArrayColumnIndex) MinValue(int) Value {
	return makeValueBytes(FixedLenByteArray, i.page.min())
}
func (i fixedLenByteArrayColumnIndex) MaxValue(int) Value {
	return makeValueBytes(FixedLenByteArray, i.page.max())
}
func (i fixedLenByteArrayColumnIndex) IsAscending() bool  { return false }
func (i fixedLenByteArrayColumnIndex) IsDescending() bool { return false }

type uint32ColumnIndex struct{ page *uint32Page }

func (i uint32ColumnIndex) NumPages() int       { return 1 }
func (i uint32ColumnIndex) NullCount(int) int64 { return 0 }
func (i uint32ColumnIndex) NullPage(int) bool   { return false }
func (i uint32ColumnIndex) MinValue(int) Value  { return makeValueUint32(i.page.min()) }
func (i uint32ColumnIndex) MaxValue(int) Value  { return makeValueUint32(i.page.max()) }
func (i uint32ColumnIndex) IsAscending() bool   { return false }
func (i uint32ColumnIndex) IsDescending() bool  { return false }

type uint64ColumnIndex struct{ page *uint64Page }

func (i uint64ColumnIndex) NumPages() int       { return 1 }
func (i uint64ColumnIndex) NullCount(int) int64 { return 0 }
func (i uint64ColumnIndex) NullPage(int) bool   { return false }
func (i uint64ColumnIndex) MinValue(int) Value  { return makeValueUint64(i.page.min()) }
func (i uint64ColumnIndex) MaxValue(int) Value  { return makeValueUint64(i.page.max()) }
func (i uint64ColumnIndex) IsAscending() bool   { return false }
func (i uint64ColumnIndex) IsDescending() bool  { return false }

type be128ColumnIndex struct{ page *be128Page }

func (i be128ColumnIndex) NumPages() int       { return 1 }
func (i be128ColumnIndex) NullCount(int) int64 { return 0 }
func (i be128ColumnIndex) NullPage(int) bool   { return false }
func (i be128ColumnIndex) MinValue(int) Value  { return makeValueBytes(FixedLenByteArray, i.page.min()) }
func (i be128ColumnIndex) MaxValue(int) Value  { return makeValueBytes(FixedLenByteArray, i.page.max()) }
func (i be128ColumnIndex) IsAscending() bool   { return false }
func (i be128ColumnIndex) IsDescending() bool  { return false }

// The ColumnIndexer interface is implemented by types that support generating
// parquet column indexes.
//
// The package does not export any types that implement this interface, programs
// must call NewColumnIndexer on a Type instance to construct column indexers.
type ColumnIndexer interface {
	// Resets the column indexer state.
	Reset()

	// Add a page to the column indexer.
	IndexPage(numValues, numNulls int64, min, max Value)

	// Generates a format.ColumnIndex value from the current state of the
	// column indexer.
	//
	// The returned value may reference internal buffers, in which case the
	// values remain valid until the next call to IndexPage or Reset on the
	// column indexer.
	ColumnIndex() format.ColumnIndex
}

type baseColumnIndexer struct {
	nullPages  []bool
	nullCounts []int64
}

func (i *baseColumnIndexer) reset() {
	i.nullPages = i.nullPages[:0]
	i.nullCounts = i.nullCounts[:0]
}

func (i *baseColumnIndexer) observe(numValues, numNulls int64) {
	i.nullPages = append(i.nullPages, numValues == numNulls)
	i.nullCounts = append(i.nullCounts, numNulls)
}

func (i *baseColumnIndexer) columnIndex(minValues, maxValues [][]byte, minOrder, maxOrder int) format.ColumnIndex {
	return format.ColumnIndex{
		NullPages:     i.nullPages,
		NullCounts:    i.nullCounts,
		MinValues:     minValues,
		MaxValues:     maxValues,
		BoundaryOrder: boundaryOrderOf(minOrder, maxOrder),
	}
}

type booleanColumnIndexer struct {
	baseColumnIndexer
	minValues []bool
	maxValues []bool
}

func newBooleanColumnIndexer() *booleanColumnIndexer {
	return new(booleanColumnIndexer)
}

func (i *booleanColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.boolean())
	i.maxValues = append(i.maxValues, max.boolean())
}

func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(unsafecast.BoolToBytes(i.minValues), 1),
		splitFixedLenByteArrays(unsafecast.BoolToBytes(i.maxValues), 1),
		orderOfBool(i.minValues),
		orderOfBool(i.maxValues),
	)
}

type int32ColumnIndexer struct {
	baseColumnIndexer
	minValues []int32
	maxValues []int32
}

func newInt32ColumnIndexer() *int32ColumnIndexer {
	return new(int32ColumnIndexer)
}

func (i *int32ColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.int32())
	i.maxValues = append(i.maxValues, max.int32())
}

func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.minValues), 4),
		splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.maxValues), 4),
		orderOfInt32(i.minValues),
		orderOfInt32(i.maxValues),
	)
}

type int64ColumnIndexer struct {
	baseColumnIndexer
	minValues []int64
	maxValues []int64
}

func newInt64ColumnIndexer() *int64ColumnIndexer {
	return new(int64ColumnIndexer)
}

func (i *int64ColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.int64())
	i.maxValues = append(i.maxValues, max.int64())
}

func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.minValues), 8),
		splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.maxValues), 8),
		orderOfInt64(i.minValues),
		orderOfInt64(i.maxValues),
	)
}

type int96ColumnIndexer struct {
	baseColumnIndexer
	minValues []deprecated.Int96
	maxValues []deprecated.Int96
}

func newInt96ColumnIndexer() *int96ColumnIndexer {
	return new(int96ColumnIndexer)
}

func (i *int96ColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.Int96())
	i.maxValues = append(i.maxValues, max.Int96())
}

func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(deprecated.Int96ToBytes(i.minValues), 12),
		splitFixedLenByteArrays(deprecated.Int96ToBytes(i.maxValues), 12),
		deprecated.OrderOfInt96(i.minValues),
		deprecated.OrderOfInt96(i.maxValues),
	)
}

type floatColumnIndexer struct {
	baseColumnIndexer
	minValues []float32
	maxValues []float32
}

func newFloatColumnIndexer() *floatColumnIndexer {
	return new(floatColumnIndexer)
}

func (i *floatColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.float())
	i.maxValues = append(i.maxValues, max.float())
}

func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.minValues), 4),
		splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.maxValues), 4),
		orderOfFloat32(i.minValues),
		orderOfFloat32(i.maxValues),
	)
}

type doubleColumnIndexer struct {
	baseColumnIndexer
	minValues []float64
	maxValues []float64
}

func newDoubleColumnIndexer() *doubleColumnIndexer {
	return new(doubleColumnIndexer)
}

func (i *doubleColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.double())
	i.maxValues = append(i.maxValues, max.double())
}

func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.minValues), 8),
		splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.maxValues), 8),
		orderOfFloat64(i.minValues),
		orderOfFloat64(i.maxValues),
	)
}

type byteArrayColumnIndexer struct {
	baseColumnIndexer
	sizeLimit int
	minValues []byte
	maxValues []byte
}

func newByteArrayColumnIndexer(sizeLimit int) *byteArrayColumnIndexer {
	return &byteArrayColumnIndexer{sizeLimit: sizeLimit}
}

func (i *byteArrayColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *byteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = plain.AppendByteArray(i.minValues, min.byteArray())
	i.maxValues = plain.AppendByteArray(i.maxValues, max.byteArray())
}

func (i *byteArrayColumnIndexer) ColumnIndex() format.ColumnIndex {
	minValues := splitByteArrays(i.minValues)
	maxValues := splitByteArrays(i.maxValues)
	if sizeLimit := i.sizeLimit; sizeLimit > 0 {
		for i, v := range minValues {
			minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit)
		}
		for i, v := range maxValues {
			maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit)
		}
	}
	return i.columnIndex(
		minValues,
		maxValues,
		orderOfBytes(minValues),
		orderOfBytes(maxValues),
	)
}

type fixedLenByteArrayColumnIndexer struct {
	baseColumnIndexer
	size      int
	sizeLimit int
	minValues []byte
	maxValues []byte
}

func newFixedLenByteArrayColumnIndexer(size, sizeLimit int) *fixedLenByteArrayColumnIndexer {
	return &fixedLenByteArrayColumnIndexer{
		size:      size,
		sizeLimit: sizeLimit,
	}
}

func (i *fixedLenByteArrayColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *fixedLenByteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.byteArray()...)
	i.maxValues = append(i.maxValues, max.byteArray()...)
}

func (i *fixedLenByteArrayColumnIndexer) ColumnIndex() format.ColumnIndex {
	minValues := splitFixedLenByteArrays(i.minValues, i.size)
	maxValues := splitFixedLenByteArrays(i.maxValues, i.size)
	if sizeLimit := i.sizeLimit; sizeLimit > 0 {
		for i, v := range minValues {
			minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit)
		}
		for i, v := range maxValues {
			maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit)
		}
	}
	return i.columnIndex(
		minValues,
		maxValues,
		orderOfBytes(minValues),
		orderOfBytes(maxValues),
	)
}

type uint32ColumnIndexer struct {
	baseColumnIndexer
	minValues []uint32
	maxValues []uint32
}

func newUint32ColumnIndexer() *uint32ColumnIndexer {
	return new(uint32ColumnIndexer)
}

func (i *uint32ColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.uint32())
	i.maxValues = append(i.maxValues, max.uint32())
}

func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.minValues), 4),
		splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.maxValues), 4),
		orderOfUint32(i.minValues),
		orderOfUint32(i.maxValues),
	)
}

type uint64ColumnIndexer struct {
	baseColumnIndexer
	minValues []uint64
	maxValues []uint64
}

func newUint64ColumnIndexer() *uint64ColumnIndexer {
	return new(uint64ColumnIndexer)
}

func (i *uint64ColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	i.minValues = append(i.minValues, min.uint64())
	i.maxValues = append(i.maxValues, max.uint64())
}

func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex {
	return i.columnIndex(
		splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.minValues), 8),
		splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.maxValues), 8),
		orderOfUint64(i.minValues),
		orderOfUint64(i.maxValues),
	)
}

type be128ColumnIndexer struct {
	baseColumnIndexer
	minValues [][16]byte
	maxValues [][16]byte
}

func newBE128ColumnIndexer() *be128ColumnIndexer {
	return new(be128ColumnIndexer)
}

func (i *be128ColumnIndexer) Reset() {
	i.reset()
	i.minValues = i.minValues[:0]
	i.maxValues = i.maxValues[:0]
}

func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
	i.observe(numValues, numNulls)
	if !min.IsNull() {
		i.minValues = append(i.minValues, *(*[16]byte)(min.byteArray()))
	}
	if !max.IsNull() {
		i.maxValues = append(i.maxValues, *(*[16]byte)(max.byteArray()))
	}
}

func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex {
	minValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.minValues), 16)
	maxValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.maxValues), 16)
	return i.columnIndex(
		minValues,
		maxValues,
		orderOfBytes(minValues),
		orderOfBytes(maxValues),
	)
}

func truncateLargeMinByteArrayValue(value []byte, sizeLimit int) []byte {
	if len(value) > sizeLimit {
		value = value[:sizeLimit]
	}
	return value
}

// truncateLargeMaxByteArrayValue truncates the given byte array to the given size limit.
// If the given byte array is truncated, it is incremented by 1 in place.
func truncateLargeMaxByteArrayValue(value []byte, sizeLimit int) []byte {
	if len(value) > sizeLimit {
		value = value[:sizeLimit]
		incrementByteArrayInplace(value)
	}
	return value
}

// incrementByteArray increments the given byte array by 1.
// Reference: https://github.com/apache/parquet-mr/blob/master/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BinaryTruncator.java#L124
func incrementByteArrayInplace(value []byte) {
	for i := len(value) - 1; i >= 0; i-- {
		value[i]++
		if value[i] != 0 { // Did not overflow: 0xFF -> 0x00
			return
		}
	}
	// Fully overflowed, so restore all to 0xFF
	for i := range value {
		value[i] = 0xFF
	}
}

func splitByteArrays(data []byte) [][]byte {
	length := 0
	plain.RangeByteArray(data, func([]byte) error {
		length++
		return nil
	})
	buffer := make([]byte, 0, len(data)-(4*length))
	values := make([][]byte, 0, length)
	plain.RangeByteArray(data, func(value []byte) error {
		offset := len(buffer)
		buffer = append(buffer, value...)
		values = append(values, buffer[offset:])
		return nil
	})
	return values
}

func splitFixedLenByteArrays(data []byte, size int) [][]byte {
	data = copyBytes(data)
	values := make([][]byte, len(data)/size)
	for i := range values {
		j := (i + 0) * size
		k := (i + 1) * size
		values[i] = data[j:k:k]
	}
	return values
}

func boundaryOrderOf(minOrder, maxOrder int) format.BoundaryOrder {
	if minOrder == maxOrder {
		switch {
		case minOrder > 0:
			return format.Ascending
		case minOrder < 0:
			return format.Descending
		}
	}
	return format.Unordered
}


================================================
FILE: column_index_internal_test.go
================================================
package parquet

import (
	"bytes"
	"testing"
)

func TestIncrementByteArrayInplace(t *testing.T) {
	testCases := [][]byte{
		{0x00, 0x01, 0x02, 0x03}, {0x00, 0x01, 0x02, 0x04},
		{0x00, 0x01, 0x02, 0xFF}, {0x00, 0x01, 0x03, 0x00},
		{0x00, 0x01, 0xFF, 0xFF}, {0x00, 0x02, 0x00, 0x00},
		{0xFF, 0xFF, 0xFF, 0xFF}, {0xFF, 0xFF, 0xFF, 0xFF},
	}

	for i := 0; i < len(testCases); i += 2 {
		input := testCases[i]
		expected := testCases[i+1]
		actual := copyBytes(input)
		incrementByteArrayInplace(actual)
		if !bytes.Equal(actual, expected) {
			t.Errorf("incrementByteArrayInplace(%v) = %v, want %v", input, actual, expected)
		}
	}
}


================================================
FILE: column_index_test.go
================================================
package parquet_test

import (
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestBinaryColumnIndexMinMax(t *testing.T) {
	testCases := [][]interface{}{
		// kind, type, page min, page max, size limit, [value to search, expected result]...
		{parquet.ByteArray, parquet.ByteArrayType,
			[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
			[]byte{0, 0, 0, 0, 0, 0}, true,
			[]byte{0, 1, 2, 3, 4, 5}, true,
			[]byte{1, 2, 3, 4}, true,
			[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
			[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
			[]byte{1, 2, 3, 5}, true, // false positive due to size limit
			[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
			[]byte{2, 3, 4, 5}, false, // should be no hit since it definitely exceeds page max
		},
		{parquet.FixedLenByteArray, parquet.FixedLenByteArrayType(6),
			[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
			[]byte{0, 0, 0, 0, 0, 0}, true,
			[]byte{0, 1, 2, 3, 4, 5}, true,
			[]byte{1, 2, 3, 4, 0, 0}, true,
			[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
			[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
			[]byte{1, 2, 3, 4, 0xFF, 0xFF}, true, // false positive due to size limit
			[]byte{1, 2, 3, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
			[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
			[]byte{2, 3, 4, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
		},
	}
	for _, testCase := range testCases {
		kind := testCase[0].(parquet.Kind)
		typ := testCase[1].(parquet.Type)
		min := testCase[2].([]byte)
		max := testCase[3].([]byte)
		sizeLimit := testCase[4].(int)
		indexer := typ.NewColumnIndexer(sizeLimit)
		indexer.IndexPage(100, 0,
			parquet.ValueOf(min),
			parquet.ValueOf(max),
		)
		formatIndex := indexer.ColumnIndex()
		columnIndex := parquet.NewColumnIndex(kind, &formatIndex)
		for i := 5; i < len(testCase); i += 2 {
			value := testCase[i].([]byte)
			expected := testCase[i+1].(bool)

			v := parquet.ValueOf(value)
			actual := parquet.Search(columnIndex, v, typ) == 0
			if actual != expected {
				t.Errorf("checkByteArrayMinMax(%v, %v, %v, %v) = %v, want %v", min, max, value, sizeLimit, actual, expected)
			}
		}
	}
}


================================================
FILE: column_mapping.go
================================================
package parquet

// LeafColumn is a struct type representing leaf columns of a parquet schema.
type LeafColumn struct {
	Node               Node
	Path               []string
	ColumnIndex        int
	MaxRepetitionLevel int
	MaxDefinitionLevel int
}

func columnMappingOf(schema Node) (mapping columnMappingGroup, columns [][]string) {
	mapping = make(columnMappingGroup)
	columns = make([][]string, 0, 16)

	forEachLeafColumnOf(schema, func(leaf leafColumn) {
		path := make(columnPath, len(leaf.path))
		copy(path, leaf.path)
		columns = append(columns, path)

		group := mapping
		for len(path) > 1 {
			columnName := path[0]
			g, ok := group[columnName].(columnMappingGroup)
			if !ok {
				g = make(columnMappingGroup)
				group[columnName] = g
			}
			group, path = g, path[1:]
		}

		leaf.path = path // use the copy
		group[path[0]] = &columnMappingLeaf{column: leaf}
	})

	return mapping, columns
}

type columnMapping interface {
	lookup(path columnPath) leafColumn
}

type columnMappingGroup map[string]columnMapping

func (group columnMappingGroup) lookup(path columnPath) leafColumn {
	if len(path) > 0 {
		c, ok := group[path[0]]
		if ok {
			return c.lookup(path[1:])
		}
	}
	return leafColumn{columnIndex: -1}
}

func (group columnMappingGroup) lookupClosest(path columnPath) leafColumn {
	for len(path) > 0 {
		g, ok := group[path[0]].(columnMappingGroup)
		if ok {
			group, path = g, path[1:]
		} else {
			firstName := ""
			firstLeaf := (*columnMappingLeaf)(nil)
			for name, child := range group {
				if leaf, ok := child.(*columnMappingLeaf); ok {
					if firstLeaf == nil || name < firstName {
						firstName, firstLeaf = name, leaf
					}
				}
			}
			if firstLeaf != nil {
				return firstLeaf.column
			}
			break
		}
	}
	return leafColumn{columnIndex: -1}
}

type columnMappingLeaf struct {
	column leafColumn
}

func (leaf *columnMappingLeaf) lookup(path columnPath) leafColumn {
	if len(path) == 0 {
		return leaf.column
	}
	return leafColumn{columnIndex: -1}
}


================================================
FILE: column_mapping_test.go
================================================
package parquet_test

import (
	"fmt"
	"strings"

	"github.com/segmentio/parquet-go"
)

func ExampleSchema_Lookup() {
	schema := parquet.SchemaOf(struct {
		FirstName  string `parquet:"first_name"`
		LastName   string `parquet:"last_name"`
		Attributes []struct {
			Name  string `parquet:"name"`
			Value string `parquet:"value"`
		} `parquet:"attributes"`
	}{})

	for _, path := range schema.Columns() {
		leaf, _ := schema.Lookup(path...)
		fmt.Printf("%d => %q\n", leaf.ColumnIndex, strings.Join(path, "."))
	}

	// Output:
	// 0 => "first_name"
	// 1 => "last_name"
	// 2 => "attributes.name"
	// 3 => "attributes.value"
}


================================================
FILE: column_path.go
================================================
package parquet

import (
	"strings"
)

type columnPath []string

func (path columnPath) append(names ...string) columnPath {
	return append(path[:len(path):len(path)], names...)
}

func (path columnPath) equal(other columnPath) bool {
	return stringsAreEqual(path, other)
}

func (path columnPath) less(other columnPath) bool {
	return stringsAreOrdered(path, other)
}

func (path columnPath) String() string {
	return strings.Join(path, ".")
}

func stringsAreEqual(strings1, strings2 []string) bool {
	if len(strings1) != len(strings2) {
		return false
	}

	for i := range strings1 {
		if strings1[i] != strings2[i] {
			return false
		}
	}

	return true
}

func stringsAreOrdered(strings1, strings2 []string) bool {
	n := len(strings1)

	if n > len(strings2) {
		n = len(strings2)
	}

	for i := 0; i < n; i++ {
		if strings1[i] >= strings2[i] {
			return false
		}
	}

	return len(strings1) <= len(strings2)
}

type leafColumn struct {
	node               Node
	path               columnPath
	maxRepetitionLevel byte
	maxDefinitionLevel byte
	columnIndex        int16
}

func forEachLeafColumnOf(node Node, do func(leafColumn)) {
	forEachLeafColumn(node, nil, 0, 0, 0, do)
}

func forEachLeafColumn(node Node, path columnPath, columnIndex, maxRepetitionLevel, maxDefinitionLevel int, do func(leafColumn)) int {
	switch {
	case node.Optional():
		maxDefinitionLevel++
	case node.Repeated():
		maxRepetitionLevel++
		maxDefinitionLevel++
	}

	if node.Leaf() {
		do(leafColumn{
			node:               node,
			path:               path,
			maxRepetitionLevel: makeRepetitionLevel(maxRepetitionLevel),
			maxDefinitionLevel: makeDefinitionLevel(maxDefinitionLevel),
			columnIndex:        makeColumnIndex(columnIndex),
		})
		return columnIndex + 1
	}

	for _, field := range node.Fields() {
		columnIndex = forEachLeafColumn(
			field,
			path.append(field.Name()),
			columnIndex,
			maxRepetitionLevel,
			maxDefinitionLevel,
			do,
		)
	}

	return columnIndex
}

func lookupColumnPath(node Node, path columnPath) Node {
	for node != nil && len(path) > 0 {
		node = fieldByName(node, path[0])
		path = path[1:]
	}
	return node
}

func hasColumnPath(node Node, path columnPath) bool {
	return lookupColumnPath(node, path) != nil
}


================================================
FILE: column_test.go
================================================
package parquet_test

import (
	"fmt"
	"math/rand"
	"testing"

	"github.com/google/uuid"
	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/format"
)

func TestColumnPageIndex(t *testing.T) {
	for _, config := range [...]struct {
		name string
		test func(*testing.T, rows) bool
	}{
		{
			name: "buffer",
			test: testColumnPageIndexWithBuffer,
		},
		{
			name: "file",
			test: testColumnPageIndexWithFile,
		},
	} {
		t.Run(config.name, func(t *testing.T) {
			for _, test := range [...]struct {
				scenario string
				function func(*testing.T) interface{}
			}{
				{
					scenario: "boolean",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value bool }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "int32",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value int32 }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "int64",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value int64 }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "int96",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value deprecated.Int96 }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "uint32",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value uint32 }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "uint64",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value uint64 }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "float32",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value float32 }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "float64",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value float64 }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "string",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value string }) bool { return config.test(t, makeRows(rows)) }
					},
				},

				{
					scenario: "uuid",
					function: func(t *testing.T) interface{} {
						return func(rows []struct{ Value uuid.UUID }) bool { return config.test(t, makeRows(rows)) }
					},
				},
			} {
				t.Run(test.scenario, func(t *testing.T) {
					if err := quickCheck(test.function(t)); err != nil {
						t.Error(err)
					}
				})
			}
		})
	}
}

func testColumnPageIndexWithBuffer(t *testing.T, rows rows) bool {
	if len(rows) > 0 {
		b := parquet.NewBuffer()
		for _, row := range rows {
			b.Write(row)
		}
		if err := checkRowGroupColumnIndex(b); err != nil {
			t.Error(err)
			return false
		}
		if err := checkRowGroupOffsetIndex(b); err != nil {
			t.Error(err)
			return false
		}
	}
	return true
}

func checkRowGroupColumnIndex(rowGroup parquet.RowGroup) error {
	for i, column := range rowGroup.ColumnChunks() {
		if err := checkColumnChunkColumnIndex(column); err != nil {
			return fmt.Errorf("column chunk @i=%d: %w", i, err)
		}
	}
	return nil
}

func checkColumnChunkColumnIndex(columnChunk parquet.ColumnChunk) error {
	columnType := columnChunk.Type()
	columnIndex := columnChunk.ColumnIndex()
	numPages := columnIndex.NumPages()
	pagesRead := 0
	stats := newColumnStats(columnType)
	pages := columnChunk.Pages()
	defer pages.Close()

	err := forEachPage(pages, func(page parquet.Page) error {
		pageMin, pageMax, hasBounds := page.Bounds()
		if !hasBounds {
			return fmt.Errorf("page bounds are missing")
		}
		indexMin := columnIndex.MinValue(pagesRead)
		indexMax := columnIndex.MaxValue(pagesRead)

		if !parquet.Equal(pageMin, indexMin) {
			return fmt.Errorf("max page value mismatch: index=%q page=%q", indexMin, pageMin)
		}
		if !parquet.Equal(pageMax, indexMax) {
			return fmt.Errorf("max page value mismatch: index=%q page=%q", indexMax, pageMax)
		}

		numNulls := int64(0)
		numValues := int64(0)
		err := forEachValue(page.Values(), func(value parquet.Value) error {
			stats.observe(value)
			if value.IsNull() {
				numNulls++
			}
			numValues++
			return nil
		})
		if err != nil {
			return err
		}

		nullCount := columnIndex.NullCount(pagesRead)
		if numNulls != nullCount {
			return fmt.Errorf("number of null values mimatch: index=%d page=%d", nullCount, numNulls)
		}

		nullPage := columnIndex.NullPage(pagesRead)
		if numNulls > 0 && numNulls == numValues && !nullPage {
			return fmt.Errorf("page only contained null values but the index did not categorize it as a null page: nulls=%d", numNulls)
		}

		stats.pageRead()
		pagesRead++
		return nil
	})
	if err != nil {
		return fmt.Errorf("page @i=%d: %w", pagesRead, err)
	}
	if pagesRead != numPages {
		return fmt.Errorf("number of pages found in column index differs from the number of pages read: index=%d read=%d", numPages, pagesRead)
	}

	actualOrder := columnIndexOrder(columnIndex)
	observedOrder := observedIndexOrder(columnType, stats.minValues, stats.maxValues)
	xorAscending := (columnIndex.IsAscending() || observedOrder == ascendingIndexOrder) &&
		!(columnIndex.IsAscending() && observedOrder == ascendingIndexOrder)
	xorDescending := (columnIndex.IsDescending() || observedOrder == descendingIndexOrder) &&
		!(columnIndex.IsDescending() && observedOrder == descendingIndexOrder)

	if xorAscending || xorDescending {
		return fmt.Errorf("column index is declared to be %s while observed values %s (min values %s, max values %s)",
			actualOrder,
			observedOrder,
			valueOrder(columnType, stats.minValues),
			valueOrder(columnType, stats.maxValues),
		)
	}

	return nil
}

func checkRowGroupOffsetIndex(rowGroup parquet.RowGroup) error {
	for i, column := range rowGroup.ColumnChunks() {
		if err := checkColumnChunkOffsetIndex(column); err != nil {
			return fmt.Errorf("column chunk @i=%d: %w", i, err)
		}
	}
	return nil
}

func checkColumnChunkOffsetIndex(columnChunk parquet.ColumnChunk) error {
	offsetIndex := columnChunk.OffsetIndex()
	numPages := offsetIndex.NumPages()
	pagesRead := 0
	rowIndex := int64(0)

	pages := columnChunk.Pages()
	defer pages.Close()

	err := forEachPage(pages, func(page parquet.Page) error {
		if firstRowIndex := offsetIndex.FirstRowIndex(pagesRead); firstRowIndex != rowIndex {
			return fmt.Errorf("row number mismatch: index=%d page=%d", firstRowIndex, rowIndex)
		}
		rowIndex += int64(page.NumRows())
		pagesRead++
		return nil
	})
	if err != nil {
		return fmt.Errorf("page @i=%d: %w", pagesRead, err)
	}

	if pagesRead != numPages {
		return fmt.Errorf("number of pages found in offset index differs from the number of pages read: index=%d read=%d", numPages, pagesRead)
	}

	return nil
}

func testColumnPageIndexWithFile(t *testing.T, rows rows) bool {
	if len(rows) > 0 {
		r := rand.New(rand.NewSource(5))
		f, err := createParquetFile(rows,
			parquet.PageBufferSize(r.Intn(49)+1),
			parquet.ColumnIndexSizeLimit(4096),
		)
		if err != nil {
			t.Error(err)
			return false
		}
		if err := checkFileColumnIndex(f); err != nil {
			t.Error(err)
			return false
		}
		if err := checkFileOffsetIndex(f); err != nil {
			t.Error(err)
			return false
		}
		for i, rowGroup := range f.RowGroups() {
			if err := checkRowGroupColumnIndex(rowGroup); err != nil {
				t.Errorf("checking column index of row group @i=%d: %v", i, err)
				return false
			}
			if err := checkRowGroupOffsetIndex(rowGroup); err != nil {
				t.Errorf("checking offset index of row group @i=%d: %v", i, err)
				return false
			}
		}
	}
	return true
}

func checkFileColumnIndex(f *parquet.File) error {
	columnIndexes := f.ColumnIndexes()
	i := 0
	return forEachColumnChunk(f, func(col *parquet.Column, chunk parquet.ColumnChunk) error {
		columnIndex := chunk.ColumnIndex()
		if n := columnIndex.NumPages(); n <= 0 {
			return fmt.Errorf("invalid number of pages found in the column index: %d", n)
		}
		if i >= len(columnIndexes) {
			return fmt.Errorf("more column indexes were read when iterating over column chunks than when reading from the file (i=%d,n=%d)", i, len(columnIndexes))
		}

		index1 := columnIndex
		index2 := &fileColumnIndex{
			kind:        col.Type().Kind(),
			ColumnIndex: columnIndexes[i],
		}

		numPages1 := index1.NumPages()
		numPages2 := index2.NumPages()
		if numPages1 != numPages2 {
			return fmt.Errorf("number of pages mismatch: got=%d want=%d", numPages1, numPages2)
		}

		for j := 0; j < numPages1; j++ {
			nullCount1 := index1.NullCount(j)
			nullCount2 := index2.NullCount(j)
			if nullCount1 != nullCount2 {
				return fmt.Errorf("null count of page %d/%d mismatch: got=%d want=%d", i, numPages1, nullCount1, nullCount2)
			}

			nullPage1 := index1.NullPage(j)
			nullPage2 := index2.NullPage(j)
			if nullPage1 != nullPage2 {
				return fmt.Errorf("null page of page %d/%d mismatch: got=%t want=%t", i, numPages1, nullPage1, nullPage2)
			}

			minValue1 := index1.MinValue(j)
			minValue2 := index2.MinValue(j)
			if !parquet.Equal(minValue1, minValue2) {
				return fmt.Errorf("min value of page %d/%d mismatch: got=%v want=%v", i, numPages1, minValue1, minValue2)
			}

			maxValue1 := index1.MaxValue(j)
			maxValue2 := index2.MaxValue(j)
			if !parquet.Equal(maxValue1, maxValue2) {
				return fmt.Errorf("max value of page %d/%d mismatch: got=%v want=%v", i, numPages1, maxValue1, maxValue2)
			}

			isAscending1 := index1.IsAscending()
			isAscending2 := index2.IsAscending()
			if isAscending1 != isAscending2 {
				return fmt.Errorf("ascending state of page %d/%d mismatch: got=%t want=%t", i, numPages1, isAscending1, isAscending2)
			}

			isDescending1 := index1.IsDescending()
			isDescending2 := index2.IsDescending()
			if isDescending1 != isDescending2 {
				return fmt.Errorf("descending state of page %d/%d mismatch: got=%t want=%t", i, numPages1, isDescending1, isDescending2)
			}
		}

		i++
		return nil
	})
}

func checkFileOffsetIndex(f *parquet.File) error {
	offsetIndexes := f.OffsetIndexes()
	i := 0
	return forEachColumnChunk(f, func(col *parquet.Column, chunk parquet.ColumnChunk) error {
		offsetIndex := chunk.OffsetIndex()
		if n := offsetIndex.NumPages(); n <= 0 {
			return fmt.Errorf("invalid number of pages found in the offset index: %d", n)
		}
		if i >= len(offsetIndexes) {
			return fmt.Errorf("more offset indexes were read when iterating over column chunks than when reading from the file (i=%d,n=%d)", i, len(offsetIndexes))
		}

		index1 := offsetIndex
		index2 := (*fileOffsetIndex)(&offsetIndexes[i])

		numPages1 := index1.NumPages()
		numPages2 := index2.NumPages()
		if numPages1 != numPages2 {
			return fmt.Errorf("number of pages mismatch: got=%d want=%d", numPages1, numPages2)
		}

		for j := 0; j < numPages1; j++ {
			offset1 := index1.Offset(j)
			offset2 := index2.Offset(j)
			if offset1 != offset2 {
				return fmt.Errorf("offsets of page %d/%d mismatch: got=%d want=%d", i, numPages1, offset1, offset2)
			}

			compressedPageSize1 := index1.CompressedPageSize(j)
			compressedPageSize2 := index2.CompressedPageSize(j)
			if compressedPageSize1 != compressedPageSize2 {
				return fmt.Errorf("compressed page size of page %d/%d mismatch: got=%d want=%d", i, numPages1, compressedPageSize1, compressedPageSize2)
			}

			firstRowIndex1 := index1.FirstRowIndex(j)
			firstRowIndex2 := index2.FirstRowIndex(j)
			if firstRowIndex1 != firstRowIndex2 {
				return fmt.Errorf("first row index of page %d/%d mismatch: got=%d want=%d", i, numPages1, firstRowIndex1, firstRowIndex2)
			}
		}

		i++
		return nil
	})
}

type fileColumnIndex struct {
	kind parquet.Kind
	format.ColumnIndex
}

func (i *fileColumnIndex) NumPages() int                { return len(i.NullPages) }
func (i *fileColumnIndex) NullCount(j int) int64        { return i.NullCounts[j] }
func (i *fileColumnIndex) NullPage(j int) bool          { return i.NullPages[j] }
func (i *fileColumnIndex) MinValue(j int) parquet.Value { return i.kind.Value(i.MinValues[j]) }
func (i *fileColumnIndex) MaxValue(j int) parquet.Value { return i.kind.Value(i.MaxValues[j]) }
func (i *fileColumnIndex) IsAscending() bool            { return i.BoundaryOrder == format.Ascending }
func (i *fileColumnIndex) IsDescending() bool           { return i.BoundaryOrder == format.Descending }

type fileOffsetIndex format.OffsetIndex

func (i *fileOffsetIndex) NumPages() int      { return len(i.PageLocations) }
func (i *fileOffsetIndex) Offset(j int) int64 { return i.PageLocations[j].Offset }
func (i *fileOffsetIndex) CompressedPageSize(j int) int64 {
	return int64(i.PageLocations[j].CompressedPageSize)
}
func (i *fileOffsetIndex) FirstRowIndex(j int) int64 { return i.PageLocations[j].FirstRowIndex }

type columnStats struct {
	page       int
	columnType parquet.Type
	minValues  []parquet.Value
	maxValues  []parquet.Value
}

func newColumnStats(columnType parquet.Type) *columnStats {
	return &columnStats{columnType: columnType}
}

func (c *columnStats) observe(value parquet.Value) {
	if c.page >= len(c.minValues) {
		c.minValues = append(c.minValues, value.Clone())
	} else if c.columnType.Compare(c.minValues[c.page], value) > 0 {
		c.minValues[c.page] = value.Clone()
	}

	if c.page >= len(c.maxValues) {
		c.maxValues = append(c.maxValues, value.Clone())
	} else if c.columnType.Compare(c.maxValues[c.page], value) < 0 {
		c.maxValues[c.page] = value.Clone()
	}
}

func (c *columnStats) pageRead() {
	c.page++
}

type indexOrder int

const (
	invalidIndexOrder indexOrder = iota
	unorderedIndexOrder
	ascendingIndexOrder
	descendingIndexOrder
)

func (o indexOrder) String() string {
	switch o {
	case unorderedIndexOrder:
		return "unordered"
	case ascendingIndexOrder:
		return "ascending"
	case descendingIndexOrder:
		return "descending"
	default:
		return "invalid"
	}
}

func columnIndexOrder(index parquet.ColumnIndex) indexOrder {
	switch {
	case index.IsAscending() && index.IsDescending():
		return invalidIndexOrder
	case index.IsAscending():
		return ascendingIndexOrder
	case index.IsDescending():
		return descendingIndexOrder
	default:
		return unorderedIndexOrder
	}
}

func observedIndexOrder(columnType parquet.Type, minValues []parquet.Value, maxValues []parquet.Value) indexOrder {
	a := valueOrder(columnType, minValues)
	b := valueOrder(columnType, maxValues)

	switch {
	case a == ascendingIndexOrder && b == ascendingIndexOrder:
		return ascendingIndexOrder
	case a == descendingIndexOrder && b == descendingIndexOrder:
		return descendingIndexOrder
	default:
		return unorderedIndexOrder
	}
}

func valueOrder(columnType parquet.Type, values []parquet.Value) indexOrder {
	switch len(values) {
	case 0, 1:
		return unorderedIndexOrder
	}

	var order int
	for i := 1; i < len(values); i++ {
		next := columnType.Compare(values[i-1], values[i])
		if next == 0 {
			continue
		}
		if order == 0 {
			order = next
			continue
		}
		if order != next {
			return unorderedIndexOrder
		}
	}

	if order > 0 {
		return descendingIndexOrder
	}

	return ascendingIndexOrder
}


================================================
FILE: compare.go
================================================
package parquet

import (
	"encoding/binary"

	"github.com/segmentio/parquet-go/deprecated"
)

// CompareDescending constructs a comparison function which inverses the order
// of values.
//
//go:noinline
func CompareDescending(cmp func(Value, Value) int) func(Value, Value) int {
	return func(a, b Value) int { return -cmp(a, b) }
}

// CompareNullsFirst constructs a comparison function which assumes that null
// values are smaller than all other values.
//
//go:noinline
func CompareNullsFirst(cmp func(Value, Value) int) func(Value, Value) int {
	return func(a, b Value) int {
		switch {
		case a.IsNull():
			if b.IsNull() {
				return 0
			}
			return -1
		case b.IsNull():
			return +1
		default:
			return cmp(a, b)
		}
	}
}

// CompareNullsLast constructs a comparison function which assumes that null
// values are greater than all other values.
//
//go:noinline
func CompareNullsLast(cmp func(Value, Value) int) func(Value, Value) int {
	return func(a, b Value) int {
		switch {
		case a.IsNull():
			if b.IsNull() {
				return 0
			}
			return +1
		case b.IsNull():
			return -1
		default:
			return cmp(a, b)
		}
	}
}

func compareBool(v1, v2 bool) int {
	switch {
	case !v1 && v2:
		return -1
	case v1 && !v2:
		return +1
	default:
		return 0
	}
}

func compareInt32(v1, v2 int32) int {
	switch {
	case v1 < v2:
		return -1
	case v1 > v2:
		return +1
	default:
		return 0
	}
}

func compareInt64(v1, v2 int64) int {
	switch {
	case v1 < v2:
		return -1
	case v1 > v2:
		return +1
	default:
		return 0
	}
}

func compareInt96(v1, v2 deprecated.Int96) int {
	switch {
	case v1.Less(v2):
		return -1
	case v2.Less(v1):
		return +1
	default:
		return 0
	}
}

func compareFloat32(v1, v2 float32) int {
	switch {
	case v1 < v2:
		return -1
	case v1 > v2:
		return +1
	default:
		return 0
	}
}

func compareFloat64(v1, v2 float64) int {
	switch {
	case v1 < v2:
		return -1
	case v1 > v2:
		return +1
	default:
		return 0
	}
}

func compareUint32(v1, v2 uint32) int {
	switch {
	case v1 < v2:
		return -1
	case v1 > v2:
		return +1
	default:
		return 0
	}
}

func compareUint64(v1, v2 uint64) int {
	switch {
	case v1 < v2:
		return -1
	case v1 > v2:
		return +1
	default:
		return 0
	}
}

func compareBE128(v1, v2 *[16]byte) int {
	x := binary.BigEndian.Uint64(v1[:8])
	y := binary.BigEndian.Uint64(v2[:8])
	switch {
	case x < y:
		return -1
	case x > y:
		return +1
	}
	x = binary.BigEndian.Uint64(v1[8:])
	y = binary.BigEndian.Uint64(v2[8:])
	switch {
	case x < y:
		return -1
	case x > y:
		return +1
	default:
		return 0
	}
}

func lessBE128(v1, v2 *[16]byte) bool {
	x := binary.BigEndian.Uint64(v1[:8])
	y := binary.BigEndian.Uint64(v2[:8])
	switch {
	case x < y:
		return true
	case x > y:
		return false
	}
	x = binary.BigEndian.Uint64(v1[8:])
	y = binary.BigEndian.Uint64(v2[8:])
	return x < y
}

func compareRowsFuncOf(schema *Schema, sortingColumns []SortingColumn) func(Row, Row) int {
	leafColumns := make([]leafColumn, len(sortingColumns))
	canCompareRows := true

	forEachLeafColumnOf(schema, func(leaf leafColumn) {
		if leaf.maxRepetitionLevel > 0 {
			canCompareRows = false
		}

		if sortingIndex := searchSortingColumn(sortingColumns, leaf.path); sortingIndex < len(sortingColumns) {
			leafColumns[sortingIndex] = leaf

			if leaf.maxDefinitionLevel > 0 {
				canCompareRows = false
			}
		}
	})

	// This is an optimization for the common case where rows
	// are sorted by non-optional, non-repeated columns.
	//
	// The sort function can make the assumption that it will
	// find the column value at the current column index, and
	// does not need to scan the rows looking for values with
	// a matching column index.
	if canCompareRows {
		return compareRowsFuncOfColumnIndexes(leafColumns, sortingColumns)
	}

	return compareRowsFuncOfColumnValues(leafColumns, sortingColumns)
}

func compareRowsUnordered(Row, Row) int { return 0 }

//go:noinline
func compareRowsFuncOfIndexColumns(compareFuncs []func(Row, Row) int) func(Row, Row) int {
	return func(row1, row2 Row) int {
		for _, compare := range compareFuncs {
			if cmp := compare(row1, row2); cmp != 0 {
				return cmp
			}
		}
		return 0
	}
}

//go:noinline
func compareRowsFuncOfIndexAscending(columnIndex int16, typ Type) func(Row, Row) int {
	return func(row1, row2 Row) int { return typ.Compare(row1[columnIndex], row2[columnIndex]) }
}

//go:noinline
func compareRowsFuncOfIndexDescending(columnIndex int16, typ Type) func(Row, Row) int {
	return func(row1, row2 Row) int { return -typ.Compare(row1[columnIndex], row2[columnIndex]) }
}

//go:noinline
func compareRowsFuncOfColumnIndexes(leafColumns []leafColumn, sortingColumns []SortingColumn) func(Row, Row) int {
	compareFuncs := make([]func(Row, Row) int, len(sortingColumns))

	for sortingIndex, sortingColumn := range sortingColumns {
		leaf := leafColumns[sortingIndex]
		typ := leaf.node.Type()

		if sortingColumn.Descending() {
			compareFuncs[sortingIndex] = compareRowsFuncOfIndexDescending(leaf.columnIndex, typ)
		} else {
			compareFuncs[sortingIndex] = compareRowsFuncOfIndexAscending(leaf.columnIndex, typ)
		}
	}

	switch len(compareFuncs) {
	case 0:
		return compareRowsUnordered
	case 1:
		return compareFuncs[0]
	default:
		return compareRowsFuncOfIndexColumns(compareFuncs)
	}
}

//go:noinline
func compareRowsFuncOfColumnValues(leafColumns []leafColumn, sortingColumns []SortingColumn) func(Row, Row) int {
	highestColumnIndex := int16(0)
	columnIndexes := make([]int16, len(sortingColumns))
	compareFuncs := make([]func(Value, Value) int, len(sortingColumns))

	for sortingIndex, sortingColumn := range sortingColumns {
		leaf := leafColumns[sortingIndex]
		compare := leaf.node.Type().Compare

		if sortingColumn.Descending() {
			compare = CompareDescending(compare)
		}

		if leaf.maxDefinitionLevel > 0 {
			if sortingColumn.NullsFirst() {
				compare = CompareNullsFirst(compare)
			} else {
				compare = CompareNullsLast(compare)
			}
		}

		columnIndexes[sortingIndex] = leaf.columnIndex
		compareFuncs[sortingIndex] = compare

		if leaf.columnIndex > highestColumnIndex {
			highestColumnIndex = leaf.columnIndex
		}
	}

	return func(row1, row2 Row) int {
		columns1 := make([][2]int32, 0, 64)
		columns2 := make([][2]int32, 0, 64)

		i1 := 0
		i2 := 0

		for columnIndex := int16(0); columnIndex <= highestColumnIndex; columnIndex++ {
			j1 := i1 + 1
			j2 := i2 + 1

			for j1 < len(row1) && row1[j1].columnIndex == ^columnIndex {
				j1++
			}

			for j2 < len(row2) && row2[j2].columnIndex == ^columnIndex {
				j2++
			}

			columns1 = append(columns1, [2]int32{int32(i1), int32(j1)})
			columns2 = append(columns2, [2]int32{int32(i2), int32(j2)})
			i1 = j1
			i2 = j2
		}

		for i, compare := range compareFuncs {
			columnIndex := columnIndexes[i]
			offsets1 := columns1[columnIndex]
			offsets2 := columns2[columnIndex]
			values1 := row1[offsets1[0]:offsets1[1]:offsets1[1]]
			values2 := row2[offsets2[0]:offsets2[1]:offsets2[1]]
			i1 := 0
			i2 := 0

			for i1 < len(values1) && i2 < len(values2) {
				if cmp := compare(values1[i1], values2[i2]); cmp != 0 {
					return cmp
				}
				i1++
				i2++
			}

			if i1 < len(values1) {
				return +1
			}
			if i2 < len(values2) {
				return -1
			}
		}
		return 0
	}
}


================================================
FILE: compare_test.go
================================================
package parquet

import "testing"

func assertCompare(t *testing.T, a, b Value, cmp func(Value, Value) int, want int) {
	if got := cmp(a, b); got != want {
		t.Errorf("compare(%v, %v): got=%d want=%d", a, b, got, want)
	}
}

func TestCompareNullsFirst(t *testing.T) {
	cmp := CompareNullsFirst(Int32Type.Compare)
	assertCompare(t, Value{}, Value{}, cmp, 0)
	assertCompare(t, Value{}, ValueOf(int32(0)), cmp, -1)
	assertCompare(t, ValueOf(int32(0)), Value{}, cmp, +1)
	assertCompare(t, ValueOf(int32(0)), ValueOf(int32(1)), cmp, -1)
}

func TestCompareNullsLast(t *testing.T) {
	cmp := CompareNullsLast(Int32Type.Compare)
	assertCompare(t, Value{}, Value{}, cmp, 0)
	assertCompare(t, Value{}, ValueOf(int32(0)), cmp, +1)
	assertCompare(t, ValueOf(int32(0)), Value{}, cmp, -1)
	assertCompare(t, ValueOf(int32(0)), ValueOf(int32(1)), cmp, -1)
}

func BenchmarkCompareBE128(b *testing.B) {
	v1 := [16]byte{}
	v2 := [16]byte{}

	for i := 0; i < b.N; i++ {
		compareBE128(&v1, &v2)
	}
}

func BenchmarkLessBE128(b *testing.B) {
	v1 := [16]byte{}
	v2 := [16]byte{}

	for i := 0; i < b.N; i++ {
		lessBE128(&v1, &v2)
	}
}


================================================
FILE: compress/brotli/brotli.go
================================================
// Package brotli implements the BROTLI parquet compression codec.
package brotli

import (
	"io"

	"github.com/andybalholm/brotli"
	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/format"
)

const (
	DefaultQuality = 0
	DefaultLGWin   = 0
)

type Codec struct {
	// Quality controls the compression-speed vs compression-density trade-offs.
	// The higher the quality, the slower the compression. Range is 0 to 11.
	Quality int
	// LGWin is the base 2 logarithm of the sliding window size.
	// Range is 10 to 24. 0 indicates automatic configuration based on Quality.
	LGWin int

	r compress.Decompressor
	w compress.Compressor
}

func (c *Codec) String() string {
	return "BROTLI"
}

func (c *Codec) CompressionCodec() format.CompressionCodec {
	return format.Brotli
}

func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
	return c.w.Encode(dst, src, func(w io.Writer) (compress.Writer, error) {
		return brotli.NewWriterOptions(w, brotli.WriterOptions{
			Quality: c.Quality,
			LGWin:   c.LGWin,
		}), nil
	})
}

func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
	return c.r.Decode(dst, src, func(r io.Reader) (compress.Reader, error) {
		return reader{brotli.NewReader(r)}, nil
	})
}

type reader struct{ *brotli.Reader }

func (reader) Close() error { return nil }


================================================
FILE: compress/compress.go
================================================
// Package compress provides the generic APIs implemented by parquet compression
// codecs.
//
// https://github.com/apache/parquet-format/blob/master/Compression.md
package compress

import (
	"bytes"
	"io"
	"sync"

	"github.com/segmentio/parquet-go/format"
)

// The Codec interface represents parquet compression codecs implemented by the
// compress sub-packages.
//
// Codec instances must be safe to use concurrently from multiple goroutines.
type Codec interface {
	// Returns a human-readable name for the codec.
	String() string

	// Returns the code of the compression codec in the parquet format.
	CompressionCodec() format.CompressionCodec

	// Writes the compressed version of src to dst and returns it.
	//
	// The method automatically reallocates the output buffer if its capacity
	// was too small to hold the compressed data.
	Encode(dst, src []byte) ([]byte, error)

	// Writes the uncompressed version of src to dst and returns it.
	//
	// The method automatically reallocates the output buffer if its capacity
	// was too small to hold the uncompressed data.
	Decode(dst, src []byte) ([]byte, error)
}

type Reader interface {
	io.ReadCloser
	Reset(io.Reader) error
}

type Writer interface {
	io.WriteCloser
	Reset(io.Writer)
}

type Compressor struct {
	writers sync.Pool // *writer
}

type writer struct {
	output bytes.Buffer
	writer Writer
}

func (c *Compressor) Encode(dst, src []byte, newWriter func(io.Writer) (Writer, error)) ([]byte, error) {
	w, _ := c.writers.Get().(*writer)
	if w != nil {
		w.output = *bytes.NewBuffer(dst[:0])
		w.writer.Reset(&w.output)
	} else {
		w = new(writer)
		w.output = *bytes.NewBuffer(dst[:0])
		var err error
		if w.writer, err = newWriter(&w.output); err != nil {
			return dst, err
		}
	}

	defer func() {
		w.output = *bytes.NewBuffer(nil)
		w.writer.Reset(io.Discard)
		c.writers.Put(w)
	}()

	if _, err := w.writer.Write(src); err != nil {
		return w.output.Bytes(), err
	}
	if err := w.writer.Close(); err != nil {
		return w.output.Bytes(), err
	}
	return w.output.Bytes(), nil
}

type Decompressor struct {
	readers sync.Pool // *reader
}

type reader struct {
	input  bytes.Reader
	reader Reader
}

func (d *Decompressor) Decode(dst, src []byte, newReader func(io.Reader) (Reader, error)) ([]byte, error) {
	r, _ := d.readers.Get().(*reader)
	if r != nil {
		r.input.Reset(src)
		if err := r.reader.Reset(&r.input); err != nil {
			return dst, err
		}
	} else {
		r = new(reader)
		r.input.Reset(src)
		var err error
		if r.reader, err = newReader(&r.input); err != nil {
			return dst, err
		}
	}

	defer func() {
		r.input.Reset(nil)
		if err := r.reader.Reset(nil); err == nil {
			d.readers.Put(r)
		}
	}()

	if cap(dst) == 0 {
		dst = make([]byte, 0, 2*len(src))
	} else {
		dst = dst[:0]
	}

	for {
		n, err := r.reader.Read(dst[len(dst):cap(dst)])
		dst = dst[:len(dst)+n]

		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return dst, err
		}

		if len(dst) == cap(dst) {
			tmp := make([]byte, len(dst), 2*len(dst))
			copy(tmp, dst)
			dst = tmp
		}
	}
}


================================================
FILE: compress/compress_test.go
================================================
package compress_test

import (
	"bytes"
	"io"
	"testing"

	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/compress/brotli"
	"github.com/segmentio/parquet-go/compress/gzip"
	"github.com/segmentio/parquet-go/compress/lz4"
	"github.com/segmentio/parquet-go/compress/snappy"
	"github.com/segmentio/parquet-go/compress/uncompressed"
	"github.com/segmentio/parquet-go/compress/zstd"
)

var tests = [...]struct {
	scenario string
	codec    compress.Codec
}{
	{
		scenario: "uncompressed",
		codec:    new(uncompressed.Codec),
	},

	{
		scenario: "snappy",
		codec:    new(snappy.Codec),
	},

	{
		scenario: "gzip",
		codec:    new(gzip.Codec),
	},

	{
		scenario: "brotli",
		codec:    new(brotli.Codec),
	},

	{
		scenario: "zstd",
		codec:    new(zstd.Codec),
	},

	{
		scenario: "lz4",
		codec:    new(lz4.Codec),
	},
}

var testdata = bytes.Repeat([]byte("1234567890qwertyuiopasdfghjklzxcvbnm"), 10e3)

func TestCompressionCodec(t *testing.T) {
	buffer := make([]byte, 0, len(testdata))
	output := make([]byte, 0, len(testdata))

	for _, test := range tests {
		t.Run(test.scenario, func(t *testing.T) {
			const N = 10
			// Run the test multiple times to exercise codecs that maintain
			// state across compression/decompression.
			for i := 0; i < N; i++ {
				var err error

				buffer, err = test.codec.Encode(buffer[:0], testdata)
				if err != nil {
					t.Fatal(err)
				}

				output, err = test.codec.Decode(output[:0], buffer)
				if err != nil {
					t.Fatal(err)
				}

				if !bytes.Equal(testdata, output) {
					t.Errorf("content mismatch after compressing and decompressing (attempt %d/%d)", i+1, N)
				}
			}
		})
	}
}

func BenchmarkEncode(b *testing.B) {
	buffer := make([]byte, 0, len(testdata))

	for _, test := range tests {
		b.Run(test.scenario, func(b *testing.B) {
			b.SetBytes(int64(len(testdata)))
			benchmarkZeroAllocsPerRun(b, func() {
				buffer, _ = test.codec.Encode(buffer[:0], testdata)
			})
		})
	}
}

func BenchmarkDecode(b *testing.B) {
	buffer := make([]byte, 0, len(testdata))
	output := make([]byte, 0, len(testdata))

	for _, test := range tests {
		b.Run(test.scenario, func(b *testing.B) {
			buffer, _ = test.codec.Encode(buffer[:0], testdata)
			b.SetBytes(int64(len(testdata)))
			benchmarkZeroAllocsPerRun(b, func() {
				output, _ = test.codec.Encode(output[:0], buffer)
			})
		})
	}
}

type simpleReader struct{ io.Reader }

func (s *simpleReader) Close() error            { return nil }
func (s *simpleReader) Reset(r io.Reader) error { s.Reader = r; return nil }

type simpleWriter struct{ io.Writer }

func (s *simpleWriter) Close() error      { return nil }
func (s *simpleWriter) Reset(w io.Writer) { s.Writer = w }

func BenchmarkCompressor(b *testing.B) {
	compressor := compress.Compressor{}
	src := make([]byte, 1000)
	dst := make([]byte, 1000)

	benchmarkZeroAllocsPerRun(b, func() {
		dst, _ = compressor.Encode(dst, src, func(w io.Writer) (compress.Writer, error) {
			return &simpleWriter{Writer: w}, nil
		})
	})
}

func BenchmarkDecompressor(b *testing.B) {
	decompressor := compress.Decompressor{}
	src := make([]byte, 1000)
	dst := make([]byte, 1000)

	benchmarkZeroAllocsPerRun(b, func() {
		dst, _ = decompressor.Decode(dst, src, func(r io.Reader) (compress.Reader, error) {
			return &simpleReader{Reader: r}, nil
		})
	})
}

func benchmarkZeroAllocsPerRun(b *testing.B, f func()) {
	if allocs := testing.AllocsPerRun(b.N, f); allocs != 0 && !testing.Short() {
		b.Errorf("too many memory allocations: %g > 0", allocs)
	}
}


================================================
FILE: compress/gzip/gzip.go
================================================
// Package gzip implements the GZIP parquet compression codec.
package gzip

import (
	"io"
	"strings"

	"github.com/klauspost/compress/gzip"
	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/format"
)

const (
	emptyGzip = "\x1f\x8b\b\x00\x00\x00\x00\x00\x02\xff\x01\x00\x00\xff\xff\x00\x00\x00\x00\x00\x00\x00\x00"
)

const (
	NoCompression      = gzip.NoCompression
	BestSpeed          = gzip.BestSpeed
	BestCompression    = gzip.BestCompression
	DefaultCompression = gzip.DefaultCompression
	HuffmanOnly        = gzip.HuffmanOnly
)

type Codec struct {
	Level int

	r compress.Decompressor
	w compress.Compressor
}

func (c *Codec) String() string {
	return "GZIP"
}

func (c *Codec) CompressionCodec() format.CompressionCodec {
	return format.Gzip
}

func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
	return c.w.Encode(dst, src, func(w io.Writer) (compress.Writer, error) {
		return gzip.NewWriterLevel(w, c.Level)
	})
}

func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
	return c.r.Decode(dst, src, func(r io.Reader) (compress.Reader, error) {
		z, err := gzip.NewReader(r)
		if err != nil {
			return nil, err
		}
		return &reader{Reader: z}, nil
	})
}

type reader struct {
	*gzip.Reader
	emptyGzip strings.Reader
}

func (r *reader) Reset(rr io.Reader) error {
	if rr == nil {
		r.emptyGzip.Reset(emptyGzip)
		rr = &r.emptyGzip
	}
	return r.Reader.Reset(rr)
}


================================================
FILE: compress/lz4/lz4.go
================================================
// Package lz4 implements the LZ4_RAW parquet compression codec.
package lz4

import (
	"github.com/pierrec/lz4/v4"
	"github.com/segmentio/parquet-go/format"
)

type Level = lz4.CompressionLevel

const (
	Fast   = lz4.Fast
	Level1 = lz4.Level1
	Level2 = lz4.Level2
	Level3 = lz4.Level3
	Level4 = lz4.Level4
	Level5 = lz4.Level5
	Level6 = lz4.Level6
	Level7 = lz4.Level7
	Level8 = lz4.Level8
	Level9 = lz4.Level9
)

const (
	DefaultLevel = Fast
)

type Codec struct {
	Level Level
}

func (c *Codec) String() string {
	return "LZ4_RAW"
}

func (c *Codec) CompressionCodec() format.CompressionCodec {
	return format.Lz4Raw
}

func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
	dst = reserveAtLeast(dst, len(src)/4)

	compressor := lz4.CompressorHC{Level: c.Level}
	for {
		n, err := compressor.CompressBlock(src, dst)
		if err != nil { // see Decode for details about error handling
			dst = make([]byte, 2*len(dst))
		} else if n == 0 {
			dst = reserveAtLeast(dst, lz4.CompressBlockBound(len(src)))
		} else {
			return dst[:n], nil
		}
	}
}

func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
	// 3x seems like a common compression ratio, so we optimistically size the
	// output buffer to that size. Feel free to change the value if you observe
	// different behaviors.
	dst = reserveAtLeast(dst, 3*len(src))

	for {
		n, err := lz4.UncompressBlock(src, dst)
		// The lz4 package does not expose the error values, they are declared
		// in internal/lz4errors. Based on what I read of the implementation,
		// the only condition where this function errors is if the output buffer
		// was too short.
		//
		// https://github.com/pierrec/lz4/blob/a5532e5996ee86d17f8ce2694c08fb5bf3c6b471/internal/lz4block/block.go#L45-L53
		if err != nil {
			dst = make([]byte, 2*len(dst))
		} else {
			return dst[:n], nil
		}
	}
}

func reserveAtLeast(b []byte, n int) []byte {
	if cap(b) < n {
		b = make([]byte, n)
	} else {
		b = b[:cap(b)]
	}
	return b
}


================================================
FILE: compress/snappy/snappy.go
================================================
// Package snappy implements the SNAPPY parquet compression codec.
package snappy

import (
	"github.com/klauspost/compress/snappy"
	"github.com/segmentio/parquet-go/format"
)

type Codec struct {
}

// The snappy.Reader and snappy.Writer implement snappy encoding/decoding with
// a framing protocol, but snappy requires the implementation to use the raw
// snappy block encoding. This is why we need to use snappy.Encode/snappy.Decode
// and have to ship custom implementations of the compressed reader and writer.

func (c *Codec) String() string {
	return "SNAPPY"
}

func (c *Codec) CompressionCodec() format.CompressionCodec {
	return format.Snappy
}

func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
	return snappy.Encode(dst, src), nil
}

func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
	return snappy.Decode(dst, src)
}


================================================
FILE: compress/uncompressed/uncompressed.go
================================================
// Package uncompressed provides implementations of the compression codec
// interfaces as pass-through without applying any compression nor
// decompression.
package uncompressed

import (
	"github.com/segmentio/parquet-go/format"
)

type Codec struct {
}

func (c *Codec) String() string {
	return "UNCOMPRESSED"
}

func (c *Codec) CompressionCodec() format.CompressionCodec {
	return format.Uncompressed
}

func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
	return append(dst[:0], src...), nil
}

func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
	return append(dst[:0], src...), nil
}


================================================
FILE: compress/zstd/zstd.go
================================================
// Package zstd implements the ZSTD parquet compression codec.
package zstd

import (
	"sync"

	"github.com/klauspost/compress/zstd"
	"github.com/segmentio/parquet-go/format"
)

type Level = zstd.EncoderLevel

const (
	// SpeedFastest will choose the fastest reasonable compression.
	// This is roughly equivalent to the fastest Zstandard mode.
	SpeedFastest = zstd.SpeedFastest

	// SpeedDefault is the default "pretty fast" compression option.
	// This is roughly equivalent to the default Zstandard mode (level 3).
	SpeedDefault = zstd.SpeedDefault

	// SpeedBetterCompression will yield better compression than the default.
	// Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
	// By using this, notice that CPU usage may go up in the future.
	SpeedBetterCompression = zstd.SpeedBetterCompression

	// SpeedBestCompression will choose the best available compression option.
	// This will offer the best compression no matter the CPU cost.
	SpeedBestCompression = zstd.SpeedBestCompression
)

const (
	DefaultLevel = SpeedDefault
)

type Codec struct {
	Level Level

	encoders sync.Pool // *zstd.Encoder
	decoders sync.Pool // *zstd.Decoder
}

func (c *Codec) String() string {
	return "ZSTD"
}

func (c *Codec) CompressionCodec() format.CompressionCodec {
	return format.Zstd
}

func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
	e, _ := c.encoders.Get().(*zstd.Encoder)
	if e == nil {
		var err error
		e, err = zstd.NewWriter(nil,
			zstd.WithEncoderConcurrency(1),
			zstd.WithEncoderLevel(c.level()),
			zstd.WithZeroFrames(true),
			zstd.WithEncoderCRC(false),
		)
		if err != nil {
			return dst[:0], err
		}
	}
	defer c.encoders.Put(e)
	return e.EncodeAll(src, dst[:0]), nil
}

func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
	d, _ := c.decoders.Get().(*zstd.Decoder)
	if d == nil {
		var err error
		d, err = zstd.NewReader(nil,
			zstd.WithDecoderConcurrency(1),
		)
		if err != nil {
			return dst[:0], err
		}
	}
	defer c.decoders.Put(d)
	return d.DecodeAll(src, dst[:0])
}

func (c *Codec) level() Level {
	if c.Level != 0 {
		return c.Level
	}
	return DefaultLevel
}


================================================
FILE: compress.go
================================================
package parquet

import (
	"fmt"

	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/compress/brotli"
	"github.com/segmentio/parquet-go/compress/gzip"
	"github.com/segmentio/parquet-go/compress/lz4"
	"github.com/segmentio/parquet-go/compress/snappy"
	"github.com/segmentio/parquet-go/compress/uncompressed"
	"github.com/segmentio/parquet-go/compress/zstd"
	"github.com/segmentio/parquet-go/format"
)

var (
	// Uncompressed is a parquet compression codec representing uncompressed
	// pages.
	Uncompressed uncompressed.Codec

	// Snappy is the SNAPPY parquet compression codec.
	Snappy snappy.Codec

	// Gzip is the GZIP parquet compression codec.
	Gzip = gzip.Codec{
		Level: gzip.DefaultCompression,
	}

	// Brotli is the BROTLI parquet compression codec.
	Brotli = brotli.Codec{
		Quality: brotli.DefaultQuality,
		LGWin:   brotli.DefaultLGWin,
	}

	// Zstd is the ZSTD parquet compression codec.
	Zstd = zstd.Codec{
		Level: zstd.DefaultLevel,
	}

	// Lz4Raw is the LZ4_RAW parquet compression codec.
	Lz4Raw = lz4.Codec{
		Level: lz4.DefaultLevel,
	}

	// Table of compression codecs indexed by their code in the parquet format.
	compressionCodecs = [...]compress.Codec{
		format.Uncompressed: &Uncompressed,
		format.Snappy:       &Snappy,
		format.Gzip:         &Gzip,
		format.Brotli:       &Brotli,
		format.Zstd:         &Zstd,
		format.Lz4Raw:       &Lz4Raw,
	}
)

// LookupCompressionCodec returns the compression codec associated with the
// given code.
//
// The function never returns nil. If the encoding is not supported,
// an "unsupported" codec is returned.
func LookupCompressionCodec(codec format.CompressionCodec) compress.Codec {
	if codec >= 0 && int(codec) < len(compressionCodecs) {
		if c := compressionCodecs[codec]; c != nil {
			return c
		}
	}
	return &unsupported{codec}
}

type unsupported struct {
	codec format.CompressionCodec
}

func (u *unsupported) String() string {
	return "UNSUPPORTED"
}

func (u *unsupported) CompressionCodec() format.CompressionCodec {
	return u.codec
}

func (u *unsupported) Encode(dst, src []byte) ([]byte, error) {
	return dst[:0], u.error()
}

func (u *unsupported) Decode(dst, src []byte) ([]byte, error) {
	return dst[:0], u.error()
}

func (u *unsupported) error() error {
	return fmt.Errorf("unsupported compression codec: %s", u.codec)
}

func isCompressed(c compress.Codec) bool {
	return c != nil && c.CompressionCodec() != format.Uncompressed
}


================================================
FILE: config.go
================================================
package parquet

import (
	"fmt"
	"math"
	"runtime/debug"
	"strings"
	"sync"

	"github.com/segmentio/parquet-go/compress"
)

// ReadMode is an enum that is used to configure the way that a File reads pages.
type ReadMode int

const (
	ReadModeSync  ReadMode = iota // ReadModeSync reads pages synchronously on demand (Default).
	ReadModeAsync                 // ReadModeAsync reads pages asynchronously in the background.
)

const (
	DefaultColumnIndexSizeLimit = 16
	DefaultColumnBufferCapacity = 16 * 1024
	DefaultPageBufferSize       = 256 * 1024
	DefaultWriteBufferSize      = 32 * 1024
	DefaultDataPageVersion      = 2
	DefaultDataPageStatistics   = false
	DefaultSkipPageIndex        = false
	DefaultSkipBloomFilters     = false
	DefaultMaxRowsPerRowGroup   = math.MaxInt64
	DefaultReadMode             = ReadModeSync
)

const (
	parquetGoModulePath = "github.com/segmentio/parquet-go"
)

var (
	defaultCreatedByInfo string
	defaultCreatedByOnce sync.Once
)

func defaultCreatedBy() string {
	defaultCreatedByOnce.Do(func() {
		createdBy := parquetGoModulePath
		build, ok := debug.ReadBuildInfo()
		if ok {
			for _, mod := range build.Deps {
				if mod.Replace == nil && mod.Path == parquetGoModulePath {
					semver, _, buildsha := parseModuleVersion(mod.Version)
					createdBy = formatCreatedBy(createdBy, semver, buildsha)
					break
				}
			}
		}
		defaultCreatedByInfo = createdBy
	})
	return defaultCreatedByInfo
}

func parseModuleVersion(version string) (semver, datetime, buildsha string) {
	semver, version = splitModuleVersion(version)
	datetime, version = splitModuleVersion(version)
	buildsha, _ = splitModuleVersion(version)
	semver = strings.TrimPrefix(semver, "v")
	return
}

func splitModuleVersion(s string) (head, tail string) {
	if i := strings.IndexByte(s, '-'); i < 0 {
		head = s
	} else {
		head, tail = s[:i], s[i+1:]
	}
	return
}

func formatCreatedBy(application, version, build string) string {
	return application + " version " + version + "(build " + build + ")"
}

// The FileConfig type carries configuration options for parquet files.
//
// FileConfig implements the FileOption interface so it can be used directly
// as argument to the OpenFile function when needed, for example:
//
//	f, err := parquet.OpenFile(reader, size, &parquet.FileConfig{
//		SkipPageIndex:    true,
//		SkipBloomFilters: true,
//		ReadMode:         ReadModeAsync,
//	})
type FileConfig struct {
	SkipPageIndex    bool
	SkipBloomFilters bool
	ReadBufferSize   int
	ReadMode         ReadMode
	Schema           *Schema
}

// DefaultFileConfig returns a new FileConfig value initialized with the
// default file configuration.
func DefaultFileConfig() *FileConfig {
	return &FileConfig{
		SkipPageIndex:    DefaultSkipPageIndex,
		SkipBloomFilters: DefaultSkipBloomFilters,
		ReadBufferSize:   defaultReadBufferSize,
		ReadMode:         DefaultReadMode,
		Schema:           nil,
	}
}

// NewFileConfig constructs a new file configuration applying the options passed
// as arguments.
//
// The function returns an non-nil error if some of the options carried invalid
// configuration values.
func NewFileConfig(options ...FileOption) (*FileConfig, error) {
	config := DefaultFileConfig()
	config.Apply(options...)
	return config, config.Validate()
}

// Apply applies the given list of options to c.
func (c *FileConfig) Apply(options ...FileOption) {
	for _, opt := range options {
		opt.ConfigureFile(c)
	}
}

// ConfigureFile applies configuration options from c to config.
func (c *FileConfig) ConfigureFile(config *FileConfig) {
	*config = FileConfig{
		SkipPageIndex:    c.SkipPageIndex,
		SkipBloomFilters: c.SkipBloomFilters,
		ReadBufferSize:   coalesceInt(c.ReadBufferSize, config.ReadBufferSize),
		ReadMode:         ReadMode(coalesceInt(int(c.ReadMode), int(config.ReadMode))),
		Schema:           coalesceSchema(c.Schema, config.Schema),
	}
}

// Validate returns a non-nil error if the configuration of c is invalid.
func (c *FileConfig) Validate() error {
	return nil
}

// The ReaderConfig type carries configuration options for parquet readers.
//
// ReaderConfig implements the ReaderOption interface so it can be used directly
// as argument to the NewReader function when needed, for example:
//
//	reader := parquet.NewReader(output, schema, &parquet.ReaderConfig{
//		// ...
//	})
type ReaderConfig struct {
	Schema *Schema
}

// DefaultReaderConfig returns a new ReaderConfig value initialized with the
// default reader configuration.
func DefaultReaderConfig() *ReaderConfig {
	return &ReaderConfig{}
}

// NewReaderConfig constructs a new reader configuration applying the options
// passed as arguments.
//
// The function returns an non-nil error if some of the options carried invalid
// configuration values.
func NewReaderConfig(options ...ReaderOption) (*ReaderConfig, error) {
	config := DefaultReaderConfig()
	config.Apply(options...)
	return config, config.Validate()
}

// Apply applies the given list of options to c.
func (c *ReaderConfig) Apply(options ...ReaderOption) {
	for _, opt := range options {
		opt.ConfigureReader(c)
	}
}

// ConfigureReader applies configuration options from c to config.
func (c *ReaderConfig) ConfigureReader(config *ReaderConfig) {
	*config = ReaderConfig{
		Schema: coalesceSchema(c.Schema, config.Schema),
	}
}

// Validate returns a non-nil error if the configuration of c is invalid.
func (c *ReaderConfig) Validate() error {
	return nil
}

// The WriterConfig type carries configuration options for parquet writers.
//
// WriterConfig implements the WriterOption interface so it can be used directly
// as argument to the NewWriter function when needed, for example:
//
//	writer := parquet.NewWriter(output, schema, &parquet.WriterConfig{
//		CreatedBy: "my test program",
//	})
type WriterConfig struct {
	CreatedBy            string
	ColumnPageBuffers    BufferPool
	ColumnIndexSizeLimit int
	PageBufferSize       int
	WriteBufferSize      int
	DataPageVersion      int
	DataPageStatistics   bool
	MaxRowsPerRowGroup   int64
	KeyValueMetadata     map[string]string
	Schema               *Schema
	BloomFilters         []BloomFilterColumn
	Compression          compress.Codec
	Sorting              SortingConfig
}

// DefaultWriterConfig returns a new WriterConfig value initialized with the
// default writer configuration.
func DefaultWriterConfig() *WriterConfig {
	return &WriterConfig{
		CreatedBy:            defaultCreatedBy(),
		ColumnPageBuffers:    &defaultColumnBufferPool,
		ColumnIndexSizeLimit: DefaultColumnIndexSizeLimit,
		PageBufferSize:       DefaultPageBufferSize,
		WriteBufferSize:      DefaultWriteBufferSize,
		DataPageVersion:      DefaultDataPageVersion,
		DataPageStatistics:   DefaultDataPageStatistics,
		MaxRowsPerRowGroup:   DefaultMaxRowsPerRowGroup,
		Sorting: SortingConfig{
			SortingBuffers: &defaultSortingBufferPool,
		},
	}
}

// NewWriterConfig constructs a new writer configuration applying the options
// passed as arguments.
//
// The function returns an non-nil error if some of the options carried invalid
// configuration values.
func NewWriterConfig(options ...WriterOption) (*WriterConfig, error) {
	config := DefaultWriterConfig()
	config.Apply(options...)
	return config, config.Validate()
}

// Apply applies the given list of options to c.
func (c *WriterConfig) Apply(options ...WriterOption) {
	for _, opt := range options {
		opt.ConfigureWriter(c)
	}
}

// ConfigureWriter applies configuration options from c to config.
func (c *WriterConfig) ConfigureWriter(config *WriterConfig) {
	keyValueMetadata := config.KeyValueMetadata
	if len(c.KeyValueMetadata) > 0 {
		if keyValueMetadata == nil {
			keyValueMetadata = make(map[string]string, len(c.KeyValueMetadata))
		}
		for k, v := range c.KeyValueMetadata {
			keyValueMetadata[k] = v
		}
	}

	*config = WriterConfig{
		CreatedBy:            coalesceString(c.CreatedBy, config.CreatedBy),
		ColumnPageBuffers:    coalesceBufferPool(c.ColumnPageBuffers, config.ColumnPageBuffers),
		ColumnIndexSizeLimit: coalesceInt(c.ColumnIndexSizeLimit, config.ColumnIndexSizeLimit),
		PageBufferSize:       coalesceInt(c.PageBufferSize, config.PageBufferSize),
		WriteBufferSize:      coalesceInt(c.WriteBufferSize, config.WriteBufferSize),
		DataPageVersion:      coalesceInt(c.DataPageVersion, config.DataPageVersion),
		DataPageStatistics:   config.DataPageStatistics,
		MaxRowsPerRowGroup:   config.MaxRowsPerRowGroup,
		KeyValueMetadata:     keyValueMetadata,
		Schema:               coalesceSchema(c.Schema, config.Schema),
		BloomFilters:         coalesceBloomFilters(c.BloomFilters, config.BloomFilters),
		Compression:          coalesceCompression(c.Compression, config.Compression),
		Sorting:              coalesceSortingConfig(c.Sorting, config.Sorting),
	}
}

// Validate returns a non-nil error if the configuration of c is invalid.
func (c *WriterConfig) Validate() error {
	const baseName = "parquet.(*WriterConfig)."
	return errorInvalidConfiguration(
		validateNotNil(baseName+"ColumnPageBuffers", c.ColumnPageBuffers),
		validatePositiveInt(baseName+"ColumnIndexSizeLimit", c.ColumnIndexSizeLimit),
		validatePositiveInt(baseName+"PageBufferSize", c.PageBufferSize),
		validateOneOfInt(baseName+"DataPageVersion", c.DataPageVersion, 1, 2),
		c.Sorting.Validate(),
	)
}

// The RowGroupConfig type carries configuration options for parquet row groups.
//
// RowGroupConfig implements the RowGroupOption interface so it can be used
// directly as argument to the NewBuffer function when needed, for example:
//
//	buffer := parquet.NewBuffer(&parquet.RowGroupConfig{
//		ColumnBufferCapacity: 10_000,
//	})
type RowGroupConfig struct {
	ColumnBufferCapacity int
	Schema               *Schema
	Sorting              SortingConfig
}

// DefaultRowGroupConfig returns a new RowGroupConfig value initialized with the
// default row group configuration.
func DefaultRowGroupConfig() *RowGroupConfig {
	return &RowGroupConfig{
		ColumnBufferCapacity: DefaultColumnBufferCapacity,
		Sorting: SortingConfig{
			SortingBuffers: &defaultSortingBufferPool,
		},
	}
}

// NewRowGroupConfig constructs a new row group configuration applying the
// options passed as arguments.
//
// The function returns an non-nil error if some of the options carried invalid
// configuration values.
func NewRowGroupConfig(options ...RowGroupOption) (*RowGroupConfig, error) {
	config := DefaultRowGroupConfig()
	config.Apply(options...)
	return config, config.Validate()
}

// Validate returns a non-nil error if the configuration of c is invalid.
func (c *RowGroupConfig) Validate() error {
	const baseName = "parquet.(*RowGroupConfig)."
	return errorInvalidConfiguration(
		validatePositiveInt(baseName+"ColumnBufferCapacity", c.ColumnBufferCapacity),
		c.Sorting.Validate(),
	)
}

func (c *RowGroupConfig) Apply(options ...RowGroupOption) {
	for _, opt := range options {
		opt.ConfigureRowGroup(c)
	}
}

func (c *RowGroupConfig) ConfigureRowGroup(config *RowGroupConfig) {
	*config = RowGroupConfig{
		ColumnBufferCapacity: coalesceInt(c.ColumnBufferCapacity, config.ColumnBufferCapacity),
		Schema:               coalesceSchema(c.Schema, config.Schema),
		Sorting:              coalesceSortingConfig(c.Sorting, config.Sorting),
	}
}

// The SortingConfig type carries configuration options for parquet row groups.
//
// SortingConfig implements the SortingOption interface so it can be used
// directly as argument to the NewSortingWriter function when needed,
// for example:
//
//	buffer := parquet.NewSortingWriter[Row](
//		parquet.SortingWriterConfig(
//			parquet.DropDuplicatedRows(true),
//		),
//	})
type SortingConfig struct {
	SortingBuffers     BufferPool
	SortingColumns     []SortingColumn
	DropDuplicatedRows bool
}

// DefaultSortingConfig returns a new SortingConfig value initialized with the
// default row group configuration.
func DefaultSortingConfig() *SortingConfig {
	return &SortingConfig{
		SortingBuffers: &defaultSortingBufferPool,
	}
}

// NewSortingConfig constructs a new sorting configuration applying the
// options passed as arguments.
//
// The function returns an non-nil error if some of the options carried invalid
// configuration values.
func NewSortingConfig(options ...SortingOption) (*SortingConfig, error) {
	config := DefaultSortingConfig()
	config.Apply(options...)
	return config, config.Validate()
}

func (c *SortingConfig) Validate() error {
	const baseName = "parquet.(*SortingConfig)."
	return errorInvalidConfiguration(
		validateNotNil(baseName+"SortingBuffers", c.SortingBuffers),
	)
}

func (c *SortingConfig) Apply(options ...SortingOption) {
	for _, opt := range options {
		opt.ConfigureSorting(c)
	}
}

func (c *SortingConfig) ConfigureSorting(config *SortingConfig) {
	*config = coalesceSortingConfig(*c, *config)
}

// FileOption is an interface implemented by types that carry configuration
// options for parquet files.
type FileOption interface {
	ConfigureFile(*FileConfig)
}

// ReaderOption is an interface implemented by types that carry configuration
// options for parquet readers.
type ReaderOption interface {
	ConfigureReader(*ReaderConfig)
}

// WriterOption is an interface implemented by types that carry configuration
// options for parquet writers.
type WriterOption interface {
	ConfigureWriter(*WriterConfig)
}

// RowGroupOption is an interface implemented by types that carry configuration
// options for parquet row groups.
type RowGroupOption interface {
	ConfigureRowGroup(*RowGroupConfig)
}

// SortingOption is an interface implemented by types that carry configuration
// options for parquet sorting writers.
type SortingOption interface {
	ConfigureSorting(*SortingConfig)
}

// SkipPageIndex is a file configuration option which prevents automatically
// reading the page index when opening a parquet file, when set to true. This is
// useful as an optimization when programs know that they will not need to
// consume the page index.
//
// Defaults to false.
func SkipPageIndex(skip bool) FileOption {
	return fileOption(func(config *FileConfig) { config.SkipPageIndex = skip })
}

// SkipBloomFilters is a file configuration option which prevents automatically
// reading the bloom filters when opening a parquet file, when set to true.
// This is useful as an optimization when programs know that they will not need
// to consume the bloom filters.
//
// Defaults to false.
func SkipBloomFilters(skip bool) FileOption {
	return fileOption(func(config *FileConfig) { config.SkipBloomFilters = skip })
}

// FileReadMode is a file configuration option which controls the way pages
// are read. Currently the only two options are ReadModeAsync and ReadModeSync
// which control whether or not pages are loaded asynchronously. It can be
// advantageous to use ReadModeAsync if your reader is backed by network
// storage.
//
// Defaults to ReadModeSync.
func FileReadMode(mode ReadMode) FileOption {
	return fileOption(func(config *FileConfig) { config.ReadMode = mode })
}

// ReadBufferSize is a file configuration option which controls the default
// buffer sizes for reads made to the provided io.Reader. The default of 4096
// is appropriate for disk based access but if your reader is backed by network
// storage it can be advantageous to increase this value to something more like
// 4 MiB.
//
// Defaults to 4096.
func ReadBufferSize(size int) FileOption {
	return fileOption(func(config *FileConfig) { config.ReadBufferSize = size })
}

// FileSchema is used to pass a known schema in while opening a Parquet file.
// This optimization is only useful if your application is currently opening
// an extremely large number of parquet files with the same, known schema.
//
// Defaults to nil.
func FileSchema(schema *Schema) FileOption {
	return fileOption(func(config *FileConfig) { config.Schema = schema })
}

// PageBufferSize configures the size of column page buffers on parquet writers.
//
// Note that the page buffer size refers to the in-memory buffers where pages
// are generated, not the size of pages after encoding and compression.
// This design choice was made to help control the amount of memory needed to
// read and write pages rather than controlling the space used by the encoded
// representation on disk.
//
// Defaults to 256KiB.
func PageBufferSize(size int) WriterOption {
	return writerOption(func(config *WriterConfig) { config.PageBufferSize = size })
}

// WriteBufferSize configures the size of the write buffer.
//
// Setting the writer buffer size to zero deactivates buffering, all writes are
// immediately sent to the output io.Writer.
//
// Defaults to 32KiB.
func WriteBufferSize(size int) WriterOption {
	return writerOption(func(config *WriterConfig) { config.WriteBufferSize = size })
}

// MaxRowsPerRowGroup configures the maximum number of rows that a writer will
// produce in each row group.
//
// This limit is useful to control size of row groups in both number of rows and
// byte size. While controlling the byte size of a row group is difficult to
// achieve with parquet due to column encoding and compression, the number of
// rows remains a useful proxy.
//
// Defaults to unlimited.
func MaxRowsPerRowGroup(numRows int64) WriterOption {
	if numRows <= 0 {
		numRows = DefaultMaxRowsPerRowGroup
	}
	return writerOption(func(config *WriterConfig) { config.MaxRowsPerRowGroup = numRows })
}

// CreatedBy creates a configuration option which sets the name of the
// application that created a parquet file.
//
// The option formats the "CreatedBy" file metadata according to the convention
// described by the parquet spec:
//
//	"<application> version <version> (build <build>)"
//
// By default, the option is set to the parquet-go module name, version, and
// build hash.
func CreatedBy(application, version, build string) WriterOption {
	createdBy := formatCreatedBy(application, version, build)
	return writerOption(func(config *WriterConfig) { config.CreatedBy = createdBy })
}

// ColumnPageBuffers creates a configuration option to customize the buffer pool
// used when constructing row groups. This can be used to provide on-disk buffers
// as swap space to ensure that the parquet file creation will no be bottlenecked
// on the amount of memory available.
//
// Defaults to using in-memory buffers.
func ColumnPageBuffers(buffers BufferPool) WriterOption {
	return writerOption(func(config *WriterConfig) { config.ColumnPageBuffers = buffers })
}

// ColumnIndexSizeLimit creates a configuration option to customize the size
// limit of page boundaries recorded in column indexes.
//
// Defaults to 16.
func ColumnIndexSizeLimit(sizeLimit int) WriterOption {
	return writerOption(func(config *WriterConfig) { config.ColumnIndexSizeLimit = sizeLimit })
}

// DataPageVersion creates a configuration option which configures the version of
// data pages used when creating a parquet file.
//
// Defaults to version 2.
func DataPageVersion(version int) WriterOption {
	return writerOption(func(config *WriterConfig) { config.DataPageVersion = version })
}

// DataPageStatistics creates a configuration option which defines whether data
// page statistics are emitted. This option is useful when generating parquet
// files that intend to be backward compatible with older readers which may not
// have the ability to load page statistics from the column index.
//
// Defaults to false.
func DataPageStatistics(enabled bool) WriterOption {
	return writerOption(func(config *WriterConfig) { config.DataPageStatistics = enabled })
}

// KeyValueMetadata creates a configuration option which adds key/value metadata
// to add to the metadata of parquet files.
//
// This option is additive, it may be used multiple times to add more than one
// key/value pair.
//
// Keys are assumed to be unique, if the same key is repeated multiple times the
// last value is retained. While the parquet format does not require unique keys,
// this design decision was made to optimize for the most common use case where
// applications leverage this extension mechanism to associate single values to
// keys. This may create incompatibilities with other parquet libraries, or may
// cause some key/value pairs to be lost when open parquet files written with
// repeated keys. We can revisit this decision if it ever becomes a blocker.
func KeyValueMetadata(key, value string) WriterOption {
	return writerOption(func(config *WriterConfig) {
		if config.KeyValueMetadata == nil {
			config.KeyValueMetadata = map[string]string{key: value}
		} else {
			config.KeyValueMetadata[key] = value
		}
	})
}

// BloomFilters creates a configuration option which defines the bloom filters
// that parquet writers should generate.
//
// The compute and memory footprint of generating bloom filters for all columns
// of a parquet schema can be significant, so by default no filters are created
// and applications need to explicitly declare the columns that they want to
// create filters for.
func BloomFilters(filters ...BloomFilterColumn) WriterOption {
	filters = append([]BloomFilterColumn{}, filters...)
	return writerOption(func(config *WriterConfig) { config.BloomFilters = filters })
}

// Compression creates a configuration option which sets the default compression
// codec used by a writer for columns where none were defined.
func Compression(codec compress.Codec) WriterOption {
	return writerOption(func(config *WriterConfig) { config.Compression = codec })
}

// SortingWriterConfig is a writer option which applies configuration specific
// to sorting writers.
func SortingWriterConfig(options ...SortingOption) WriterOption {
	options = append([]SortingOption{}, options...)
	return writerOption(func(config *WriterConfig) { config.Sorting.Apply(options...) })
}

// ColumnBufferCapacity creates a configuration option which defines the size of
// row group column buffers.
//
// Defaults to 16384.
func ColumnBufferCapacity(size int) RowGroupOption {
	return rowGroupOption(func(config *RowGroupConfig) { config.ColumnBufferCapacity = size })
}

// SortingRowGroupConfig is a row group option which applies configuration
// specific sorting row groups.
func SortingRowGroupConfig(options ...SortingOption) RowGroupOption {
	options = append([]SortingOption{}, options...)
	return rowGroupOption(func(config *RowGroupConfig) { config.Sorting.Apply(options...) })
}

// SortingColumns creates a configuration option which defines the sorting order
// of columns in a row group.
//
// The order of sorting columns passed as argument defines the ordering
// hierarchy; when elements are equal in the first column, the second column is
// used to order rows, etc...
func SortingColumns(columns ...SortingColumn) SortingOption {
	// Make a copy so that we do not retain the input slice generated implicitly
	// for the variable argument list, and also avoid having a nil slice when
	// the option is passed with no sorting columns, so we can differentiate it
	// from it not being passed.
	columns = append([]SortingColumn{}, columns...)
	return sortingOption(func(config *SortingConfig) { config.SortingColumns = columns })
}

// SortingBuffers creates a configuration option which sets the pool of buffers
// used to hold intermediary state when sorting parquet rows.
//
// Defaults to using in-memory buffers.
func SortingBuffers(buffers BufferPool) SortingOption {
	return sortingOption(func(config *SortingConfig) { config.SortingBuffers = buffers })
}

// DropDuplicatedRows configures whether a sorting writer will keep or remove
// duplicated rows.
//
// Two rows are considered duplicates if the values of their all their sorting
// columns are equal.
//
// Defaults to false
func DropDuplicatedRows(drop bool) SortingOption {
	return sortingOption(func(config *SortingConfig) { config.DropDuplicatedRows = drop })
}

type fileOption func(*FileConfig)

func (opt fileOption) ConfigureFile(config *FileConfig) { opt(config) }

type readerOption func(*ReaderConfig)

func (opt readerOption) ConfigureReader(config *ReaderConfig) { opt(config) }

type writerOption func(*WriterConfig)

func (opt writerOption) ConfigureWriter(config *WriterConfig) { opt(config) }

type rowGroupOption func(*RowGroupConfig)

func (opt rowGroupOption) ConfigureRowGroup(config *RowGroupConfig) { opt(config) }

type sortingOption func(*SortingConfig)

func (opt sortingOption) ConfigureSorting(config *SortingConfig) { opt(config) }

func coalesceInt(i1, i2 int) int {
	if i1 != 0 {
		return i1
	}
	return i2
}

func coalesceInt64(i1, i2 int64) int64 {
	if i1 != 0 {
		return i1
	}
	return i2
}

func coalesceString(s1, s2 string) string {
	if s1 != "" {
		return s1
	}
	return s2
}

func coalesceBytes(b1, b2 []byte) []byte {
	if b1 != nil {
		return b1
	}
	return b2
}

func coalesceBufferPool(p1, p2 BufferPool) BufferPool {
	if p1 != nil {
		return p1
	}
	return p2
}

func coalesceSchema(s1, s2 *Schema) *Schema {
	if s1 != nil {
		return s1
	}
	return s2
}

func coalesceSortingColumns(s1, s2 []SortingColumn) []SortingColumn {
	if s1 != nil {
		return s1
	}
	return s2
}

func coalesceSortingConfig(c1, c2 SortingConfig) SortingConfig {
	return SortingConfig{
		SortingBuffers:     coalesceBufferPool(c1.SortingBuffers, c2.SortingBuffers),
		SortingColumns:     coalesceSortingColumns(c1.SortingColumns, c2.SortingColumns),
		DropDuplicatedRows: c1.DropDuplicatedRows,
	}
}

func coalesceBloomFilters(f1, f2 []BloomFilterColumn) []BloomFilterColumn {
	if f1 != nil {
		return f1
	}
	return f2
}

func coalesceCompression(c1, c2 compress.Codec) compress.Codec {
	if c1 != nil {
		return c1
	}
	return c2
}

func validatePositiveInt(optionName string, optionValue int) error {
	if optionValue > 0 {
		return nil
	}
	return errorInvalidOptionValue(optionName, optionValue)
}

func validatePositiveInt64(optionName string, optionValue int64) error {
	if optionValue > 0 {
		return nil
	}
	return errorInvalidOptionValue(optionName, optionValue)
}

func validateOneOfInt(optionName string, optionValue int, supportedValues ...int) error {
	for _, value := range supportedValues {
		if value == optionValue {
			return nil
		}
	}
	return errorInvalidOptionValue(optionName, optionValue)
}

func validateNotNil(optionName string, optionValue interface{}) error {
	if optionValue != nil {
		return nil
	}
	return errorInvalidOptionValue(optionName, optionValue)
}

func errorInvalidOptionValue(optionName string, optionValue interface{}) error {
	return fmt.Errorf("invalid option value: %s: %v", optionName, optionValue)
}

func errorInvalidConfiguration(reasons ...error) error {
	var err *invalidConfiguration

	for _, reason := range reasons {
		if reason != nil {
			if err == nil {
				err = new(invalidConfiguration)
			}
			err.reasons = append(err.reasons, reason)
		}
	}

	if err != nil {
		return err
	}

	return nil
}

type invalidConfiguration struct {
	reasons []error
}

func (err *invalidConfiguration) Error() string {
	errorMessage := new(strings.Builder)
	for _, reason := range err.reasons {
		errorMessage.WriteString(reason.Error())
		errorMessage.WriteString("\n")
	}
	errorString := errorMessage.String()
	if errorString != "" {
		errorString = errorString[:len(errorString)-1]
	}
	return errorString
}

var (
	_ FileOption     = (*FileConfig)(nil)
	_ ReaderOption   = (*ReaderConfig)(nil)
	_ WriterOption   = (*WriterConfig)(nil)
	_ RowGroupOption = (*RowGroupConfig)(nil)
	_ SortingOption  = (*SortingConfig)(nil)
)


================================================
FILE: convert.go
================================================
package parquet

import (
	"encoding/binary"
	"encoding/hex"
	"fmt"
	"io"
	"math"
	"math/big"
	"strconv"
	"sync"
	"time"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
)

// ConvertError is an error type returned by calls to Convert when the conversion
// of parquet schemas is impossible or the input row for the conversion is
// malformed.
type ConvertError struct {
	Path []string
	From Node
	To   Node
}

// Error satisfies the error interface.
func (e *ConvertError) Error() string {
	sourceType := e.From.Type()
	targetType := e.To.Type()

	sourceRepetition := fieldRepetitionTypeOf(e.From)
	targetRepetition := fieldRepetitionTypeOf(e.To)

	return fmt.Sprintf("cannot convert parquet column %q from %s %s to %s %s",
		columnPath(e.Path),
		sourceRepetition,
		sourceType,
		targetRepetition,
		targetType,
	)
}

// Conversion is an interface implemented by types that provide conversion of
// parquet rows from one schema to another.
//
// Conversion instances must be safe to use concurrently from multiple goroutines.
type Conversion interface {
	// Applies the conversion logic on the src row, returning the result
	// appended to dst.
	Convert(rows []Row) (int, error)
	// Converts the given column index in the target schema to the original
	// column index in the source schema of the conversion.
	Column(int) int
	// Returns the target schema of the conversion.
	Schema() *Schema
}

type conversion struct {
	columns []conversionColumn
	schema  *Schema
	buffers sync.Pool
	// This field is used to size the column buffers held in the sync.Pool since
	// they are intended to store the source rows being converted from.
	numberOfSourceColumns int
}

type conversionBuffer struct {
	columns [][]Value
}

type conversionColumn struct {
	sourceIndex   int
	convertValues conversionFunc
}

type conversionFunc func([]Value) error

func convertToSelf(column []Value) error { return nil }

//go:noinline
func convertToType(targetType, sourceType Type) conversionFunc {
	return func(column []Value) error {
		for i, v := range column {
			v, err := sourceType.ConvertValue(v, targetType)
			if err != nil {
				return err
			}
			column[i].ptr = v.ptr
			column[i].u64 = v.u64
			column[i].kind = v.kind
		}
		return nil
	}
}

//go:noinline
func convertToValue(value Value) conversionFunc {
	return func(column []Value) error {
		for i := range column {
			column[i] = value
		}
		return nil
	}
}

//go:noinline
func convertToZero(kind Kind) conversionFunc {
	return func(column []Value) error {
		for i := range column {
			column[i].ptr = nil
			column[i].u64 = 0
			column[i].kind = ^int8(kind)
		}
		return nil
	}
}

//go:noinline
func convertToLevels(repetitionLevels, definitionLevels []byte) conversionFunc {
	return func(column []Value) error {
		for i := range column {
			r := column[i].repetitionLevel
			d := column[i].definitionLevel
			column[i].repetitionLevel = repetitionLevels[r]
			column[i].definitionLevel = definitionLevels[d]
		}
		return nil
	}
}

//go:noinline
func multiConversionFunc(conversions []conversionFunc) conversionFunc {
	switch len(conversions) {
	case 0:
		return convertToSelf
	case 1:
		return conversions[0]
	default:
		return func(column []Value) error {
			for _, conv := range conversions {
				if err := conv(column); err != nil {
					return err
				}
			}
			return nil
		}
	}
}

func (c *conversion) getBuffer() *conversionBuffer {
	b, _ := c.buffers.Get().(*conversionBuffer)
	if b == nil {
		b = &conversionBuffer{
			columns: make([][]Value, c.numberOfSourceColumns),
		}
		values := make([]Value, c.numberOfSourceColumns)
		for i := range b.columns {
			b.columns[i] = values[i : i : i+1]
		}
	}
	return b
}

func (c *conversion) putBuffer(b *conversionBuffer) {
	c.buffers.Put(b)
}

// Convert here satisfies the Conversion interface, and does the actual work
// to convert between the source and target Rows.
func (c *conversion) Convert(rows []Row) (int, error) {
	source := c.getBuffer()
	defer c.putBuffer(source)

	for n, row := range rows {
		for i, values := range source.columns {
			source.columns[i] = values[:0]
		}
		row.Range(func(columnIndex int, columnValues []Value) bool {
			source.columns[columnIndex] = append(source.columns[columnIndex], columnValues...)
			return true
		})
		row = row[:0]

		for columnIndex, conv := range c.columns {
			columnOffset := len(row)
			if conv.sourceIndex < 0 {
				// When there is no source column, we put a single value as
				// placeholder in the column. This is a condition where the
				// target contained a column which did not exist at had not
				// other columns existing at that same level.
				row = append(row, Value{})
			} else {
				// We must copy to the output row first and not mutate the
				// source columns because multiple target columns may map to
				// the same source column.
				row = append(row, source.columns[conv.sourceIndex]...)
			}
			columnValues := row[columnOffset:]

			if err := conv.convertValues(columnValues); err != nil {
				return n, err
			}

			// Since the column index may have changed between the source and
			// taget columns we ensure that the right value is always written
			// to the output row.
			for i := range columnValues {
				columnValues[i].columnIndex = ^int16(columnIndex)
			}
		}

		rows[n] = row
	}

	return len(rows), nil
}

func (c *conversion) Column(i int) int {
	return c.columns[i].sourceIndex
}

func (c *conversion) Schema() *Schema {
	return c.schema
}

type identity struct{ schema *Schema }

func (id identity) Convert(rows []Row) (int, error) { return len(rows), nil }
func (id identity) Column(i int) int                { return i }
func (id identity) Schema() *Schema                 { return id.schema }

// Convert constructs a conversion function from one parquet schema to another.
//
// The function supports converting between schemas where the source or target
// have extra columns; if there are more columns in the source, they will be
// stripped out of the rows. Extra columns in the target schema will be set to
// null or zero values.
//
// The returned function is intended to be used to append the converted source
// row to the destination buffer.
func Convert(to, from Node) (conv Conversion, err error) {
	schema, _ := to.(*Schema)
	if schema == nil {
		schema = NewSchema("", to)
	}

	if nodesAreEqual(to, from) {
		return identity{schema}, nil
	}

	targetMapping, targetColumns := columnMappingOf(to)
	sourceMapping, sourceColumns := columnMappingOf(from)
	columns := make([]conversionColumn, len(targetColumns))

	for i, path := range targetColumns {
		targetColumn := targetMapping.lookup(path)
		sourceColumn := sourceMapping.lookup(path)

		conversions := []conversionFunc{}
		if sourceColumn.node != nil {
			targetType := targetColumn.node.Type()
			sourceType := sourceColumn.node.Type()
			if !typesAreEqual(targetType, sourceType) {
				conversions = append(conversions,
					convertToType(targetType, sourceType),
				)
			}

			repetitionLevels := make([]byte, len(path)+1)
			definitionLevels := make([]byte, len(path)+1)
			targetRepetitionLevel := byte(0)
			targetDefinitionLevel := byte(0)
			sourceRepetitionLevel := byte(0)
			sourceDefinitionLevel := byte(0)
			targetNode := to
			sourceNode := from

			for j := 0; j < len(path); j++ {
				targetNode = fieldByName(targetNode, path[j])
				sourceNode = fieldByName(sourceNode, path[j])

				targetRepetitionLevel, targetDefinitionLevel = applyFieldRepetitionType(
					fieldRepetitionTypeOf(targetNode),
					targetRepetitionLevel,
					targetDefinitionLevel,
				)
				sourceRepetitionLevel, sourceDefinitionLevel = applyFieldRepetitionType(
					fieldRepetitionTypeOf(sourceNode),
					sourceRepetitionLevel,
					sourceDefinitionLevel,
				)

				repetitionLevels[sourceRepetitionLevel] = targetRepetitionLevel
				definitionLevels[sourceDefinitionLevel] = targetDefinitionLevel
			}

			repetitionLevels = repetitionLevels[:sourceRepetitionLevel+1]
			definitionLevels = definitionLevels[:sourceDefinitionLevel+1]

			if !isDirectLevelMapping(repetitionLevels) || !isDirectLevelMapping(definitionLevels) {
				conversions = append(conversions,
					convertToLevels(repetitionLevels, definitionLevels),
				)
			}

		} else {
			targetType := targetColumn.node.Type()
			targetKind := targetType.Kind()
			sourceColumn = sourceMapping.lookupClosest(path)
			if sourceColumn.node != nil {
				conversions = append(conversions,
					convertToZero(targetKind),
				)
			} else {
				conversions = append(conversions,
					convertToValue(ZeroValue(targetKind)),
				)
			}
		}

		columns[i] = conversionColumn{
			sourceIndex:   int(sourceColumn.columnIndex),
			convertValues: multiConversionFunc(conversions),
		}
	}

	c := &conversion{
		columns:               columns,
		schema:                schema,
		numberOfSourceColumns: len(sourceColumns),
	}
	return c, nil
}

func isDirectLevelMapping(levels []byte) bool {
	for i, level := range levels {
		if level != byte(i) {
			return false
		}
	}
	return true
}

// ConvertRowGroup constructs a wrapper of the given row group which applies
// the given schema conversion to its rows.
func ConvertRowGroup(rowGroup RowGroup, conv Conversion) RowGroup {
	schema := conv.Schema()
	numRows := rowGroup.NumRows()
	rowGroupColumns := rowGroup.ColumnChunks()

	columns := make([]ColumnChunk, numLeafColumnsOf(schema))
	forEachLeafColumnOf(schema, func(leaf leafColumn) {
		i := leaf.columnIndex
		j := conv.Column(int(leaf.columnIndex))
		if j < 0 {
			columns[i] = &missingColumnChunk{
				typ:    leaf.node.Type(),
				column: i,
				// TODO: we assume the number of values is the same as the
				// number of rows, which may not be accurate when the column is
				// part of a repeated group; neighbor columns may be repeated in
				// which case it would be impossible for this chunk not to be.
				numRows:   numRows,
				numValues: numRows,
				numNulls:  numRows,
			}
		} else {
			columns[i] = rowGroupColumns[j]
		}
	})

	// Sorting columns must exist on the conversion schema in order to be
	// advertised on the converted row group otherwise the resulting rows
	// would not be in the right order.
	sorting := []SortingColumn{}
	for _, col := range rowGroup.SortingColumns() {
		if !hasColumnPath(schema, col.Path()) {
			break
		}
		sorting = append(sorting, col)
	}

	return &convertedRowGroup{
		// The pair of rowGroup+conv is retained to construct a converted row
		// reader by wrapping the underlying row reader of the row group because
		// it allows proper reconstruction of the repetition and definition
		// levels.
		//
		// TODO: can we figure out how to set the repetition and definition
		// levels when reading values from missing column pages? At first sight
		// it appears complex to do, however:
		//
		// * It is possible that having these levels when reading values of
		//   missing column pages is not necessary in some scenarios (e.g. when
		//   merging row groups).
		//
		// * We may be able to assume the repetition and definition levels at
		//   the call site (e.g. in the functions reading rows from columns).
		//
		// Columns of the source row group which do not exist in the target are
		// masked to prevent loading unneeded pages when reading rows from the
		// converted row group.
		rowGroup: maskMissingRowGroupColumns(rowGroup, len(columns), conv),
		columns:  columns,
		sorting:  sorting,
		conv:     conv,
	}
}

func maskMissingRowGroupColumns(r RowGroup, numColumns int, conv Conversion) RowGroup {
	rowGroupColumns := r.ColumnChunks()
	columns := make([]ColumnChunk, len(rowGroupColumns))
	missing := make([]missingColumnChunk, len(columns))
	numRows := r.NumRows()

	for i := range missing {
		missing[i] = missingColumnChunk{
			typ:       rowGroupColumns[i].Type(),
			column:    int16(i),
			numRows:   numRows,
			numValues: numRows,
			numNulls:  numRows,
		}
	}

	for i := range columns {
		columns[i] = &missing[i]
	}

	for i := 0; i < numColumns; i++ {
		j := conv.Column(i)
		if j >= 0 && j < len(columns) {
			columns[j] = rowGroupColumns[j]
		}
	}

	return &rowGroup{
		schema:  r.Schema(),
		numRows: numRows,
		columns: columns,
	}
}

type missingColumnChunk struct {
	typ       Type
	column    int16
	numRows   int64
	numValues int64
	numNulls  int64
}

func (c *missingColumnChunk) Type() Type               { return c.typ }
func (c *missingColumnChunk) Column() int              { return int(c.column) }
func (c *missingColumnChunk) Pages() Pages             { return onePage(missingPage{c}) }
func (c *missingColumnChunk) ColumnIndex() ColumnIndex { return missingColumnIndex{c} }
func (c *missingColumnChunk) OffsetIndex() OffsetIndex { return missingOffsetIndex{} }
func (c *missingColumnChunk) BloomFilter() BloomFilter { return missingBloomFilter{} }
func (c *missingColumnChunk) NumValues() int64         { return 0 }

type missingColumnIndex struct{ *missingColumnChunk }

func (i missingColumnIndex) NumPages() int       { return 1 }
func (i missingColumnIndex) NullCount(int) int64 { return i.numNulls }
func (i missingColumnIndex) NullPage(int) bool   { return true }
func (i missingColumnIndex) MinValue(int) Value  { return Value{} }
func (i missingColumnIndex) MaxValue(int) Value  { return Value{} }
func (i missingColumnIndex) IsAscending() bool   { return true }
func (i missingColumnIndex) IsDescending() bool  { return false }

type missingOffsetIndex struct{}

func (missingOffsetIndex) NumPages() int                { return 1 }
func (missingOffsetIndex) Offset(int) int64             { return 0 }
func (missingOffsetIndex) CompressedPageSize(int) int64 { return 0 }
func (missingOffsetIndex) FirstRowIndex(int) int64      { return 0 }

type missingBloomFilter struct{}

func (missingBloomFilter) ReadAt([]byte, int64) (int, error) { return 0, io.EOF }
func (missingBloomFilter) Size() int64                       { return 0 }
func (missingBloomFilter) Check(Value) (bool, error)         { return false, nil }

type missingPage struct{ *missingColumnChunk }

func (p missingPage) Column() int                       { return int(p.column) }
func (p missingPage) Dictionary() Dictionary            { return nil }
func (p missingPage) NumRows() int64                    { return p.numRows }
func (p missingPage) NumValues() int64                  { return p.numValues }
func (p missingPage) NumNulls() int64                   { return p.numNulls }
func (p missingPage) Bounds() (min, max Value, ok bool) { return }
func (p missingPage) Slice(i, j int64) Page             { return p }
func (p missingPage) Size() int64                       { return 0 }
func (p missingPage) RepetitionLevels() []byte          { return nil }
func (p missingPage) DefinitionLevels() []byte          { return nil }
func (p missingPage) Data() encoding.Values             { return p.typ.NewValues(nil, nil) }
func (p missingPage) Values() ValueReader               { return &missingPageValues{page: p} }

type missingPageValues struct {
	page missingPage
	read int64
}

func (r *missingPageValues) ReadValues(values []Value) (int, error) {
	remain := r.page.numValues - r.read
	if int64(len(values)) > remain {
		values = values[:remain]
	}
	for i := range values {
		// TODO: how do we set the repetition and definition levels here?
		values[i] = Value{columnIndex: ^r.page.column}
	}
	if r.read += int64(len(values)); r.read == r.page.numValues {
		return len(values), io.EOF
	}
	return len(values), nil
}

func (r *missingPageValues) Close() error {
	r.read = r.page.numValues
	return nil
}

type convertedRowGroup struct {
	rowGroup RowGroup
	columns  []ColumnChunk
	sorting  []SortingColumn
	conv     Conversion
}

func (c *convertedRowGroup) NumRows() int64                  { return c.rowGroup.NumRows() }
func (c *convertedRowGroup) ColumnChunks() []ColumnChunk     { return c.columns }
func (c *convertedRowGroup) Schema() *Schema                 { return c.conv.Schema() }
func (c *convertedRowGroup) SortingColumns() []SortingColumn { return c.sorting }
func (c *convertedRowGroup) Rows() Rows {
	rows := c.rowGroup.Rows()
	return &convertedRows{
		Closer: rows,
		rows:   rows,
		conv:   c.conv,
	}
}

// ConvertRowReader constructs a wrapper of the given row reader which applies
// the given schema conversion to the rows.
func ConvertRowReader(rows RowReader, conv Conversion) RowReaderWithSchema {
	return &convertedRows{rows: &forwardRowSeeker{rows: rows}, conv: conv}
}

type convertedRows struct {
	io.Closer
	rows RowReadSeeker
	conv Conversion
}

func (c *convertedRows) ReadRows(rows []Row) (int, error) {
	n, err := c.rows.ReadRows(rows)
	if n > 0 {
		var convErr error
		n, convErr = c.conv.Convert(rows[:n])
		if convErr != nil {
			err = convErr
		}
	}
	return n, err
}

func (c *convertedRows) Schema() *Schema {
	return c.conv.Schema()
}

func (c *convertedRows) SeekToRow(rowIndex int64) error {
	return c.rows.SeekToRow(rowIndex)
}

var (
	trueBytes  = []byte(`true`)
	falseBytes = []byte(`false`)
	unixEpoch  = time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)
)

func convertBooleanToInt32(v Value) (Value, error) {
	return v.convertToInt32(int32(v.byte())), nil
}

func convertBooleanToInt64(v Value) (Value, error) {
	return v.convertToInt64(int64(v.byte())), nil
}

func convertBooleanToInt96(v Value) (Value, error) {
	return v.convertToInt96(deprecated.Int96{0: uint32(v.byte())}), nil
}

func convertBooleanToFloat(v Value) (Value, error) {
	return v.convertToFloat(float32(v.byte())), nil
}

func convertBooleanToDouble(v Value) (Value, error) {
	return v.convertToDouble(float64(v.byte())), nil
}

func convertBooleanToByteArray(v Value) (Value, error) {
	return v.convertToByteArray([]byte{v.byte()}), nil
}

func convertBooleanToFixedLenByteArray(v Value, size int) (Value, error) {
	b := []byte{v.byte()}
	c := make([]byte, size)
	copy(c, b)
	return v.convertToFixedLenByteArray(c), nil
}

func convertBooleanToString(v Value) (Value, error) {
	b := ([]byte)(nil)
	if v.boolean() {
		b = trueBytes
	} else {
		b = falseBytes
	}
	return v.convertToByteArray(b), nil
}

func convertInt32ToBoolean(v Value) (Value, error) {
	return v.convertToBoolean(v.int32() != 0), nil
}

func convertInt32ToInt64(v Value) (Value, error) {
	return v.convertToInt64(int64(v.int32())), nil
}

func convertInt32ToInt96(v Value) (Value, error) {
	return v.convertToInt96(deprecated.Int32ToInt96(v.int32())), nil
}

func convertInt32ToFloat(v Value) (Value, error) {
	return v.convertToFloat(float32(v.int32())), nil
}

func convertInt32ToDouble(v Value) (Value, error) {
	return v.convertToDouble(float64(v.int32())), nil
}

func convertInt32ToByteArray(v Value) (Value, error) {
	b := make([]byte, 4)
	binary.LittleEndian.PutUint32(b, v.uint32())
	return v.convertToByteArray(b), nil
}

func convertInt32ToFixedLenByteArray(v Value, size int) (Value, error) {
	b := make([]byte, 4)
	c := make([]byte, size)
	binary.LittleEndian.PutUint32(b, v.uint32())
	copy(c, b)
	return v.convertToFixedLenByteArray(c), nil
}

func convertInt32ToString(v Value) (Value, error) {
	return v.convertToByteArray(strconv.AppendInt(nil, int64(v.int32()), 10)), nil
}

func convertInt64ToBoolean(v Value) (Value, error) {
	return v.convertToBoolean(v.int64() != 0), nil
}

func convertInt64ToInt32(v Value) (Value, error) {
	return v.convertToInt32(int32(v.int64())), nil
}

func convertInt64ToInt96(v Value) (Value, error) {
	return v.convertToInt96(deprecated.Int64ToInt96(v.int64())), nil
}

func convertInt64ToFloat(v Value) (Value, error) {
	return v.convertToFloat(float32(v.int64())), nil
}

func convertInt64ToDouble(v Value) (Value, error) {
	return v.convertToDouble(float64(v.int64())), nil
}

func convertInt64ToByteArray(v Value) (Value, error) {
	b := make([]byte, 8)
	binary.LittleEndian.PutUint64(b, v.uint64())
	return v.convertToByteArray(b), nil
}

func convertInt64ToFixedLenByteArray(v Value, size int) (Value, error) {
	b := make([]byte, 8)
	c := make([]byte, size)
	binary.LittleEndian.PutUint64(b, v.uint64())
	copy(c, b)
	return v.convertToFixedLenByteArray(c), nil
}

func convertInt64ToString(v Value) (Value, error) {
	return v.convertToByteArray(strconv.AppendInt(nil, v.int64(), 10)), nil
}

func convertInt96ToBoolean(v Value) (Value, error) {
	return v.convertToBoolean(!v.int96().IsZero()), nil
}

func convertInt96ToInt32(v Value) (Value, error) {
	return v.convertToInt32(v.int96().Int32()), nil
}

func convertInt96ToInt64(v Value) (Value, error) {
	return v.convertToInt64(v.int96().Int64()), nil
}

func convertInt96ToFloat(v Value) (Value, error) {
	return v, invalidConversion(v, "INT96", "FLOAT")
}

func convertInt96ToDouble(v Value) (Value, error) {
	return v, invalidConversion(v, "INT96", "DOUBLE")
}

func convertInt96ToByteArray(v Value) (Value, error) {
	return v.convertToByteArray(v.byteArray()), nil
}

func convertInt96ToFixedLenByteArray(v Value, size int) (Value, error) {
	b := v.byteArray()
	if len(b) < size {
		c := make([]byte, size)
		copy(c, b)
		b = c
	} else {
		b = b[:size]
	}
	return v.convertToFixedLenByteArray(b), nil
}

func convertInt96ToString(v Value) (Value, error) {
	return v.convertToByteArray([]byte(v.String())), nil
}

func convertFloatToBoolean(v Value) (Value, error) {
	return v.convertToBoolean(v.float() != 0), nil
}

func convertFloatToInt32(v Value) (Value, error) {
	return v.convertToInt32(int32(v.float())), nil
}

func convertFloatToInt64(v Value) (Value, error) {
	return v.convertToInt64(int64(v.float())), nil
}

func convertFloatToInt96(v Value) (Value, error) {
	return v, invalidConversion(v, "FLOAT", "INT96")
}

func convertFloatToDouble(v Value) (Value, error) {
	return v.convertToDouble(float64(v.float())), nil
}

func convertFloatToByteArray(v Value) (Value, error) {
	b := make([]byte, 4)
	binary.LittleEndian.PutUint32(b, v.uint32())
	return v.convertToByteArray(b), nil
}

func convertFloatToFixedLenByteArray(v Value, size int) (Value, error) {
	b := make([]byte, 4)
	c := make([]byte, size)
	binary.LittleEndian.PutUint32(b, v.uint32())
	copy(c, b)
	return v.convertToFixedLenByteArray(c), nil
}

func convertFloatToString(v Value) (Value, error) {
	return v.convertToByteArray(strconv.AppendFloat(nil, float64(v.float()), 'g', -1, 32)), nil
}

func convertDoubleToBoolean(v Value) (Value, error) {
	return v.convertToBoolean(v.double() != 0), nil
}

func convertDoubleToInt32(v Value) (Value, error) {
	return v.convertToInt32(int32(v.double())), nil
}

func convertDoubleToInt64(v Value) (Value, error) {
	return v.convertToInt64(int64(v.double())), nil
}

func convertDoubleToInt96(v Value) (Value, error) {
	return v, invalidConversion(v, "FLOAT", "INT96")
}

func convertDoubleToFloat(v Value) (Value, error) {
	return v.convertToFloat(float32(v.double())), nil
}

func convertDoubleToByteArray(v Value) (Value, error) {
	b := make([]byte, 8)
	binary.LittleEndian.PutUint64(b, v.uint64())
	return v.convertToByteArray(b), nil
}

func convertDoubleToFixedLenByteArray(v Value, size int) (Value, error) {
	b := make([]byte, 8)
	c := make([]byte, size)
	binary.LittleEndian.PutUint64(b, v.uint64())
	copy(c, b)
	return v.convertToFixedLenByteArray(c), nil
}

func convertDoubleToString(v Value) (Value, error) {
	return v.convertToByteArray(strconv.AppendFloat(nil, v.double(), 'g', -1, 64)), nil
}

func convertByteArrayToBoolean(v Value) (Value, error) {
	return v.convertToBoolean(!isZero(v.byteArray())), nil
}

func convertByteArrayToInt32(v Value) (Value, error) {
	b := make([]byte, 4)
	copy(b, v.byteArray())
	return v.convertToInt32(int32(binary.LittleEndian.Uint32(b))), nil
}

func convertByteArrayToInt64(v Value) (Value, error) {
	b := make([]byte, 8)
	copy(b, v.byteArray())
	return v.convertToInt64(int64(binary.LittleEndian.Uint64(b))), nil
}

func convertByteArrayToInt96(v Value) (Value, error) {
	b := make([]byte, 12)
	copy(b, v.byteArray())
	return v.convertToInt96(deprecated.Int96{
		0: binary.LittleEndian.Uint32(b[0:4]),
		1: binary.LittleEndian.Uint32(b[4:8]),
		2: binary.LittleEndian.Uint32(b[8:12]),
	}), nil
}

func convertByteArrayToFloat(v Value) (Value, error) {
	b := make([]byte, 4)
	copy(b, v.byteArray())
	return v.convertToFloat(math.Float32frombits(binary.LittleEndian.Uint32(b))), nil
}

func convertByteArrayToDouble(v Value) (Value, error) {
	b := make([]byte, 8)
	copy(b, v.byteArray())
	return v.convertToDouble(math.Float64frombits(binary.LittleEndian.Uint64(b))), nil
}

func convertByteArrayToFixedLenByteArray(v Value, size int) (Value, error) {
	b := v.byteArray()
	if len(b) < size {
		c := make([]byte, size)
		copy(c, b)
		b = c
	} else {
		b = b[:size]
	}
	return v.convertToFixedLenByteArray(b), nil
}

func convertFixedLenByteArrayToString(v Value) (Value, error) {
	b := v.byteArray()
	c := make([]byte, hex.EncodedLen(len(b)))
	hex.Encode(c, b)
	return v.convertToByteArray(c), nil
}

func convertStringToBoolean(v Value) (Value, error) {
	b, err := strconv.ParseBool(v.string())
	if err != nil {
		return v, conversionError(v, "STRING", "BOOLEAN", err)
	}
	return v.convertToBoolean(b), nil
}

func convertStringToInt32(v Value) (Value, error) {
	i, err := strconv.ParseInt(v.string(), 10, 32)
	if err != nil {
		return v, conversionError(v, "STRING", "INT32", err)
	}
	return v.convertToInt32(int32(i)), nil
}

func convertStringToInt64(v Value) (Value, error) {
	i, err := strconv.ParseInt(v.string(), 10, 64)
	if err != nil {
		return v, conversionError(v, "STRING", "INT64", err)
	}
	return v.convertToInt64(i), nil
}

func convertStringToInt96(v Value) (Value, error) {
	i, ok := new(big.Int).SetString(v.string(), 10)
	if !ok {
		return v, conversionError(v, "STRING", "INT96", strconv.ErrSyntax)
	}
	b := i.Bytes()
	c := make([]byte, 12)
	copy(c, b)
	i96 := deprecated.BytesToInt96(c)
	return v.convertToInt96(i96[0]), nil
}

func convertStringToFloat(v Value) (Value, error) {
	f, err := strconv.ParseFloat(v.string(), 32)
	if err != nil {
		return v, conversionError(v, "STRING", "FLOAT", err)
	}
	return v.convertToFloat(float32(f)), nil
}

func convertStringToDouble(v Value) (Value, error) {
	f, err := strconv.ParseFloat(v.string(), 64)
	if err != nil {
		return v, conversionError(v, "STRING", "DOUBLE", err)
	}
	return v.convertToDouble(f), nil
}

func convertStringToFixedLenByteArray(v Value, size int) (Value, error) {
	b := v.byteArray()
	c := make([]byte, size)
	_, err := hex.Decode(c, b)
	if err != nil {
		return v, conversionError(v, "STRING", "BYTE_ARRAY", err)
	}
	return v.convertToFixedLenByteArray(c), nil
}

func convertStringToDate(v Value, tz *time.Location) (Value, error) {
	t, err := time.ParseInLocation("2006-01-02", v.string(), tz)
	if err != nil {
		return v, conversionError(v, "STRING", "DATE", err)
	}
	d := daysSinceUnixEpoch(t)
	return v.convertToInt32(int32(d)), nil
}

func convertStringToTimeMillis(v Value, tz *time.Location) (Value, error) {
	t, err := time.ParseInLocation("15:04:05.999", v.string(), tz)
	if err != nil {
		return v, conversionError(v, "STRING", "TIME", err)
	}
	m := nearestMidnightLessThan(t)
	milliseconds := t.Sub(m).Milliseconds()
	return v.convertToInt32(int32(milliseconds)), nil
}

func convertStringToTimeMicros(v Value, tz *time.Location) (Value, error) {
	t, err := time.ParseInLocation("15:04:05.999999", v.string(), tz)
	if err != nil {
		return v, conversionError(v, "STRING", "TIME", err)
	}
	m := nearestMidnightLessThan(t)
	microseconds := t.Sub(m).Microseconds()
	return v.convertToInt64(microseconds), nil
}

func convertDateToTimestamp(v Value, u format.TimeUnit, tz *time.Location) (Value, error) {
	t := unixEpoch.AddDate(0, 0, int(v.int32()))
	d := timeUnitDuration(u)
	return v.convertToInt64(int64(t.In(tz).Sub(unixEpoch) / d)), nil
}

func convertDateToString(v Value) (Value, error) {
	t := unixEpoch.AddDate(0, 0, int(v.int32()))
	b := t.AppendFormat(make([]byte, 0, 10), "2006-01-02")
	return v.convertToByteArray(b), nil
}

func convertTimeMillisToString(v Value, tz *time.Location) (Value, error) {
	t := time.UnixMilli(int64(v.int32())).In(tz)
	b := t.AppendFormat(make([]byte, 0, 12), "15:04:05.999")
	return v.convertToByteArray(b), nil
}

func convertTimeMicrosToString(v Value, tz *time.Location) (Value, error) {
	t := time.UnixMicro(v.int64()).In(tz)
	b := t.AppendFormat(make([]byte, 0, 15), "15:04:05.999999")
	return v.convertToByteArray(b), nil
}

func convertTimestampToDate(v Value, u format.TimeUnit, tz *time.Location) (Value, error) {
	t := timestamp(v, u, tz)
	d := daysSinceUnixEpoch(t)
	return v.convertToInt32(int32(d)), nil
}

func convertTimestampToTimeMillis(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) {
	t := timestamp(v, u, sourceZone)
	m := nearestMidnightLessThan(t)
	milliseconds := t.In(targetZone).Sub(m).Milliseconds()
	return v.convertToInt32(int32(milliseconds)), nil
}

func convertTimestampToTimeMicros(v Value, u format.TimeUnit, sourceZone, targetZone *time.Location) (Value, error) {
	t := timestamp(v, u, sourceZone)
	m := nearestMidnightLessThan(t)
	microseconds := t.In(targetZone).Sub(m).Microseconds()
	return v.convertToInt64(int64(microseconds)), nil
}

func convertTimestampToTimestamp(v Value, sourceUnit, targetUnit format.TimeUnit) (Value, error) {
	sourceScale := timeUnitDuration(sourceUnit).Nanoseconds()
	targetScale := timeUnitDuration(targetUnit).Nanoseconds()
	targetValue := (v.int64() * sourceScale) / targetScale
	return v.convertToInt64(targetValue), nil
}

const nanosecondsPerDay = 24 * 60 * 60 * 1e9

func daysSinceUnixEpoch(t time.Time) int {
	return int(t.Sub(unixEpoch).Hours()) / 24
}

func nearestMidnightLessThan(t time.Time) time.Time {
	y, m, d := t.Date()
	return time.Date(y, m, d, 0, 0, 0, 0, t.Location())
}

func timestamp(v Value, u format.TimeUnit, tz *time.Location) time.Time {
	return unixEpoch.In(tz).Add(time.Duration(v.int64()) * timeUnitDuration(u))
}

func timeUnitDuration(unit format.TimeUnit) time.Duration {
	switch {
	case unit.Millis != nil:
		return time.Millisecond
	case unit.Micros != nil:
		return time.Microsecond
	default:
		return time.Nanosecond
	}
}

func invalidConversion(value Value, from, to string) error {
	return fmt.Errorf("%s to %s: %s: %w", from, to, value, ErrInvalidConversion)
}

func conversionError(value Value, from, to string, err error) error {
	return fmt.Errorf("%s to %s: %q: %s: %w", from, to, value.string(), err, ErrInvalidConversion)
}


================================================
FILE: convert_test.go
================================================
package parquet_test

import (
	"reflect"
	"testing"
	"time"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/deprecated"
)

type AddressBook1 struct {
	Owner             string   `parquet:"owner,zstd"`
	OwnerPhoneNumbers []string `parquet:"ownerPhoneNumbers,gzip"`
}

type AddressBook2 struct {
	Owner             string    `parquet:"owner,zstd"`
	OwnerPhoneNumbers []string  `parquet:"ownerPhoneNumbers,gzip"`
	Contacts          []Contact `parquet:"contacts"`
	Extra             string    `parquet:"extra"`
}

type AddressBook3 struct {
	Owner    string     `parquet:"owner,zstd"`
	Contacts []Contact2 `parquet:"contacts"`
}

type Contact2 struct {
	Name         string   `parquet:"name"`
	PhoneNumbers []string `parquet:"phoneNumbers,zstd"`
	Addresses    []string `parquet:"addresses,zstd"`
}

type AddressBook4 struct {
	Owner    string     `parquet:"owner,zstd"`
	Contacts []Contact2 `parquet:"contacts"`
	Extra    string     `parquet:"extra"`
}

type SimpleNumber struct {
	Number *int64 `parquet:"number,optional"`
}

type SimpleContact struct {
	Numbers []SimpleNumber `parquet:"numbers"`
}

type SimpleAddressBook struct {
	Name    string
	Contact SimpleContact
}

type SimpleAddressBook2 struct {
	Name    string
	Contact SimpleContact
	Extra   string
}

type ListOfIDs struct {
	IDs []uint64
}

var conversionTests = [...]struct {
	scenario string
	from     interface{}
	to       interface{}
}{
	{
		scenario: "convert between rows which have the same schema",
		from: AddressBook{
			Owner: "Julien Le Dem",
			OwnerPhoneNumbers: []string{
				"555 123 4567",
				"555 666 1337",
			},
			Contacts: []Contact{
				{
					Name:        "Dmitriy Ryaboy",
					PhoneNumber: "555 987 6543",
				},
				{
					Name: "Chris Aniszczyk",
				},
			},
		},
		to: AddressBook{
			Owner: "Julien Le Dem",
			OwnerPhoneNumbers: []string{
				"555 123 4567",
				"555 666 1337",
			},
			Contacts: []Contact{
				{
					Name:        "Dmitriy Ryaboy",
					PhoneNumber: "555 987 6543",
				},
				{
					Name: "Chris Aniszczyk",
				},
			},
		},
	},

	{
		scenario: "missing column",
		from:     struct{ FirstName, LastName string }{FirstName: "Luke", LastName: "Skywalker"},
		to:       struct{ LastName string }{LastName: "Skywalker"},
	},

	{
		scenario: "missing optional column",
		from: struct {
			FirstName *string
			LastName  string
		}{FirstName: newString("Luke"), LastName: "Skywalker"},
		to: struct{ LastName string }{LastName: "Skywalker"},
	},

	{
		scenario: "missing repeated column",
		from: struct {
			ID    uint64
			Names []string
		}{ID: 42, Names: []string{"me", "myself", "I"}},
		to: struct{ ID uint64 }{ID: 42},
	},

	{
		scenario: "extra column",
		from:     struct{ LastName string }{LastName: "Skywalker"},
		to:       struct{ FirstName, LastName string }{LastName: "Skywalker"},
	},

	{
		scenario: "extra optional column",
		from:     struct{ ID uint64 }{ID: 2},
		to: struct {
			ID      uint64
			Details *struct{ FirstName, LastName string }
		}{ID: 2, Details: nil},
	},

	{
		scenario: "extra repeated column",
		from:     struct{ ID uint64 }{ID: 1},
		to: struct {
			ID    uint64
			Names []string
		}{ID: 1, Names: []string{}},
	},

	{
		scenario: "extra required column from repeated",
		from: struct{ ListOfIDs ListOfIDs }{
			ListOfIDs: ListOfIDs{IDs: []uint64{0, 1, 2}},
		},
		to: struct {
			MainID    uint64
			ListOfIDs ListOfIDs
		}{
			ListOfIDs: ListOfIDs{IDs: []uint64{0, 1, 2}},
		},
	},

	{
		scenario: "extra fields in repeated group",
		from: struct{ Books []AddressBook1 }{
			Books: []AddressBook1{
				{
					Owner:             "me",
					OwnerPhoneNumbers: []string{"123-456-7890", "321-654-0987"},
				},
				{
					Owner:             "you",
					OwnerPhoneNumbers: []string{"000-000-0000"},
				},
			},
		},
		to: struct{ Books []AddressBook2 }{
			Books: []AddressBook2{
				{
					Owner:             "me",
					OwnerPhoneNumbers: []string{"123-456-7890", "321-654-0987"},
					Contacts:          []Contact{},
				},
				{
					Owner:             "you",
					OwnerPhoneNumbers: []string{"000-000-0000"},
					Contacts:          []Contact{},
				},
			},
		},
	},

	{
		scenario: "extra column on complex struct",
		from: AddressBook{
			Owner:             "Julien Le Dem",
			OwnerPhoneNumbers: []string{},
			Contacts: []Contact{
				{
					Name:        "Dmitriy Ryaboy",
					PhoneNumber: "555 987 6543",
				},
				{
					Name: "Chris Aniszczyk",
				},
			},
		},
		to: AddressBook2{
			Owner:             "Julien Le Dem",
			OwnerPhoneNumbers: []string{},
			Contacts: []Contact{
				{
					Name:        "Dmitriy Ryaboy",
					PhoneNumber: "555 987 6543",
				},
				{
					Name: "Chris Aniszczyk",
				},
			},
		},
	},

	{
		scenario: "required to optional leaf",
		from:     struct{ Name string }{Name: "Luke"},
		to:       struct{ Name *string }{Name: newString("Luke")},
	},

	{
		scenario: "required to repeated leaf",
		from:     struct{ Name string }{Name: "Luke"},
		to:       struct{ Name []string }{Name: []string{"Luke"}},
	},

	{
		scenario: "optional to required leaf",
		from:     struct{ Name *string }{Name: newString("Luke")},
		to:       struct{ Name string }{Name: "Luke"},
	},

	{
		scenario: "optional to repeated leaf",
		from:     struct{ Name *string }{Name: newString("Luke")},
		to:       struct{ Name []string }{Name: []string{"Luke"}},
	},

	{
		scenario: "optional to repeated leaf (null)",
		from:     struct{ Name *string }{Name: nil},
		to:       struct{ Name []string }{Name: []string{}},
	},

	{
		scenario: "repeated to required leaf",
		from:     struct{ Name []string }{Name: []string{"Luke", "Han", "Leia"}},
		to:       struct{ Name string }{Name: "Luke"},
	},

	{
		scenario: "repeated to optional leaf",
		from:     struct{ Name []string }{Name: []string{"Luke", "Han", "Leia"}},
		to:       struct{ Name *string }{Name: newString("Luke")},
	},

	{
		scenario: "required to optional group",
		from: struct{ Book AddressBook }{
			Book: AddressBook{
				Owner: "Julien Le Dem",
				OwnerPhoneNumbers: []string{
					"555 123 4567",
					"555 666 1337",
				},
				Contacts: []Contact{
					{
						Name:        "Dmitriy Ryaboy",
						PhoneNumber: "555 987 6543",
					},
					{
						Name: "Chris Aniszczyk",
					},
				},
			},
		},
		to: struct{ Book *AddressBook }{
			Book: &AddressBook{
				Owner: "Julien Le Dem",
				OwnerPhoneNumbers: []string{
					"555 123 4567",
					"555 666 1337",
				},
				Contacts: []Contact{
					{
						Name:        "Dmitriy Ryaboy",
						PhoneNumber: "555 987 6543",
					},
					{
						Name: "Chris Aniszczyk",
					},
				},
			},
		},
	},

	{
		scenario: "required to optional group (empty)",
		from: struct{ Book AddressBook }{
			Book: AddressBook{},
		},
		to: struct{ Book *AddressBook }{
			Book: &AddressBook{
				OwnerPhoneNumbers: []string{},
				Contacts:          []Contact{},
			},
		},
	},

	{
		scenario: "optional to required group (null)",
		from: struct{ Book *AddressBook }{
			Book: nil,
		},
		to: struct{ Book AddressBook }{
			Book: AddressBook{
				OwnerPhoneNumbers: []string{},
				Contacts:          []Contact{},
			},
		},
	},

	{
		scenario: "optional to repeated group (null)",
		from:     struct{ Book *AddressBook }{Book: nil},
		to:       struct{ Book []AddressBook }{Book: []AddressBook{}},
	},

	{
		scenario: "optional to repeated optional group (null)",
		from:     struct{ Book *AddressBook }{Book: nil},
		to:       struct{ Book []*AddressBook }{Book: []*AddressBook{}},
	},

	{
		scenario: "handle nested repeated elements during conversion",
		from: AddressBook3{
			Owner: "Julien Le Dem",
			Contacts: []Contact2{
				{
					Name: "Dmitriy Ryaboy",
					PhoneNumbers: []string{
						"555 987 6543",
						"555 123 4567",
					},
					Addresses: []string{},
				},
				{
					Name: "Chris Aniszczyk",
					PhoneNumbers: []string{
						"555 345 8129",
					},
					Addresses: []string{
						"42 Wallaby Way Sydney",
						"1 White House Way",
					},
				},
				{
					Name: "Bob Ross",
					PhoneNumbers: []string{
						"555 198 3628",
					},
					Addresses: []string{
						"::1",
					},
				},
			},
		},
		to: AddressBook4{
			Owner: "Julien Le Dem",
			Contacts: []Contact2{
				{
					Name: "Dmitriy Ryaboy",
					PhoneNumbers: []string{
						"555 987 6543",
						"555 123 4567",
					},
					Addresses: []string{},
				},
				{
					Name: "Chris Aniszczyk",
					PhoneNumbers: []string{
						"555 345 8129",
					},
					Addresses: []string{
						"42 Wallaby Way Sydney",
						"1 White House Way",
					},
				},
				{
					Name: "Bob Ross",
					PhoneNumbers: []string{
						"555 198 3628",
					},
					Addresses: []string{
						"::1",
					},
				},
			},
			Extra: "",
		},
	},

	{
		scenario: "handle nested repeated elements during conversion",
		from: SimpleAddressBook{
			Name: "New Contact",
			Contact: SimpleContact{
				Numbers: []SimpleNumber{
					{
						Number: nil,
					},
					{
						Number: newInt64(1329),
					},
				},
			},
		},
		to: SimpleAddressBook2{
			Name: "New Contact",
			Contact: SimpleContact{
				Numbers: []SimpleNumber{
					{
						Number: nil,
					},
					{
						Number: newInt64(1329),
					},
				},
			},
			Extra: "",
		},
	},
}

func TestConvert(t *testing.T) {
	for _, test := range conversionTests {
		t.Run(test.scenario, func(t *testing.T) {
			to := parquet.SchemaOf(test.to)
			from := parquet.SchemaOf(test.from)

			conv, err := parquet.Convert(to, from)
			if err != nil {
				t.Fatal(err)
			}

			row := from.Deconstruct(nil, test.from)
			rowbuf := []parquet.Row{row}
			n, err := conv.Convert(rowbuf)
			if err != nil {
				t.Fatal(err)
			}
			if n != 1 {
				t.Errorf("wrong number of rows got converted: want=1 got=%d", n)
			}
			row = rowbuf[0]

			value := reflect.New(reflect.TypeOf(test.to))
			if err := to.Reconstruct(value.Interface(), row); err != nil {
				t.Fatal(err)
			}

			value = value.Elem()
			if !reflect.DeepEqual(value.Interface(), test.to) {
				t.Errorf("converted value mismatch:\nwant = %#v\ngot  = %#v", test.to, value.Interface())
			}
		})
	}
}

func newInt64(i int64) *int64    { return &i }
func newString(s string) *string { return &s }

func TestConvertValue(t *testing.T) {
	now := time.Unix(42, 0)
	ms := now.UnixMilli()
	us := now.UnixMicro()
	ns := now.UnixNano()

	msType := parquet.Timestamp(parquet.Millisecond).Type()
	msVal := parquet.ValueOf(ms)
	if msVal.Int64() != ms {
		t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", ms, msVal.Int64())
	}

	usType := parquet.Timestamp(parquet.Microsecond).Type()
	usVal := parquet.ValueOf(us)
	if usVal.Int64() != us {
		t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", us, usVal.Int64())
	}

	nsType := parquet.Timestamp(parquet.Nanosecond).Type()
	nsVal := parquet.ValueOf(ns)
	if nsVal.Int64() != ns {
		t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", ns, nsVal.Int64())
	}

	var timestampConversionTests = [...]struct {
		scenario  string
		fromType  parquet.Type
		fromValue parquet.Value
		toType    parquet.Type
		toValue   parquet.Value
	}{
		{
			scenario:  "true to boolean",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(true),
		},

		{
			scenario:  "true to int32",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.Int32Type,
			toValue:   parquet.Int32Value(1),
		},

		{
			scenario:  "true to int64",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(1),
		},

		{
			scenario:  "true to int96",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.Int96Type,
			toValue:   parquet.Int96Value(deprecated.Int96{0: 1}),
		},

		{
			scenario:  "true to float",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(1),
		},

		{
			scenario:  "true to double",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(1),
		},

		{
			scenario:  "true to byte array",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.ByteArrayType,
			toValue:   parquet.ByteArrayValue([]byte{1}),
		},

		{
			scenario:  "true to fixed length byte array",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.FixedLenByteArrayType(4),
			toValue:   parquet.FixedLenByteArrayValue([]byte{1, 0, 0, 0}),
		},

		{
			scenario:  "true to string",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(true),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`true`)),
		},

		{
			scenario:  "false to boolean",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(false),
		},

		{
			scenario:  "false to int32",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.Int32Type,
			toValue:   parquet.Int32Value(0),
		},

		{
			scenario:  "false to int64",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(0),
		},

		{
			scenario:  "false to int96",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.Int96Type,
			toValue:   parquet.Int96Value(deprecated.Int96{}),
		},

		{
			scenario:  "false to float",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(0),
		},

		{
			scenario:  "false to double",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(0),
		},

		{
			scenario:  "false to byte array",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.ByteArrayType,
			toValue:   parquet.ByteArrayValue([]byte{0}),
		},

		{
			scenario:  "false to fixed length byte array",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.FixedLenByteArrayType(4),
			toValue:   parquet.FixedLenByteArrayValue([]byte{0, 0, 0, 0}),
		},

		{
			scenario:  "false to string",
			fromType:  parquet.BooleanType,
			fromValue: parquet.BooleanValue(false),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`false`)),
		},

		{
			scenario:  "int32 to true",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(10),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(true),
		},

		{
			scenario:  "int32 to false",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(0),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(false),
		},

		{
			scenario:  "int32 to int32",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(42),
			toType:    parquet.Int32Type,
			toValue:   parquet.Int32Value(42),
		},

		{
			scenario:  "int32 to int64",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(-21),
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(-21),
		},

		{
			scenario:  "int32 to int96",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(123),
			toType:    parquet.Int96Type,
			toValue:   parquet.Int96Value(deprecated.Int96{0: 123}),
		},

		{
			scenario:  "int32 to float",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(9),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(9),
		},

		{
			scenario:  "int32 to double",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(100),
			toType:    parquet.DoubleType,
			toValue:   parquet.DoubleValue(100),
		},

		{
			scenario:  "int32 to byte array",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(1 << 8),
			toType:    parquet.ByteArrayType,
			toValue:   parquet.ByteArrayValue([]byte{0, 1, 0, 0}),
		},

		{
			scenario:  "int32 to fixed length byte array",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(1 << 8),
			toType:    parquet.FixedLenByteArrayType(3),
			toValue:   parquet.FixedLenByteArrayValue([]byte{0, 1, 0}),
		},

		{
			scenario:  "int32 to string",
			fromType:  parquet.Int32Type,
			fromValue: parquet.Int32Value(12345),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`12345`)),
		},

		{
			scenario:  "int64 to true",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(10),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(true),
		},

		{
			scenario:  "int64 to false",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(0),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(false),
		},

		{
			scenario:  "int64 to int32",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(-21),
			toType:    parquet.Int32Type,
			toValue:   parquet.Int32Value(-21),
		},

		{
			scenario:  "int64 to int64",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(42),
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(42),
		},

		{
			scenario:  "int64 to int96",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(123),
			toType:    parquet.Int96Type,
			toValue:   parquet.Int96Value(deprecated.Int96{0: 123}),
		},

		{
			scenario:  "int64 to float",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(9),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(9),
		},

		{
			scenario:  "int64 to double",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(100),
			toType:    parquet.DoubleType,
			toValue:   parquet.DoubleValue(100),
		},

		{
			scenario:  "int64 to byte array",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(1 << 8),
			toType:    parquet.ByteArrayType,
			toValue:   parquet.ByteArrayValue([]byte{0, 1, 0, 0, 0, 0, 0, 0}),
		},

		{
			scenario:  "int64 to fixed length byte array",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(1 << 8),
			toType:    parquet.FixedLenByteArrayType(3),
			toValue:   parquet.FixedLenByteArrayValue([]byte{0, 1, 0}),
		},

		{
			scenario:  "int64 to string",
			fromType:  parquet.Int64Type,
			fromValue: parquet.Int64Value(1234567890),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`1234567890`)),
		},

		{
			scenario:  "float to true",
			fromType:  parquet.FloatType,
			fromValue: parquet.FloatValue(0.1),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(true),
		},

		{
			scenario:  "float to false",
			fromType:  parquet.FloatType,
			fromValue: parquet.FloatValue(0),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(false),
		},

		{
			scenario:  "float to int32",
			fromType:  parquet.FloatType,
			fromValue: parquet.FloatValue(9.9),
			toType:    parquet.Int32Type,
			toValue:   parquet.Int32Value(9),
		},

		{
			scenario:  "float to int64",
			fromType:  parquet.FloatType,
			fromValue: parquet.FloatValue(-1.5),
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(-1),
		},

		{
			scenario:  "float to float",
			fromType:  parquet.FloatType,
			fromValue: parquet.FloatValue(1.234),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(1.234),
		},

		{
			scenario:  "float to double",
			fromType:  parquet.FloatType,
			fromValue: parquet.FloatValue(-0.5),
			toType:    parquet.DoubleType,
			toValue:   parquet.DoubleValue(-0.5),
		},

		{
			scenario:  "float to string",
			fromType:  parquet.FloatType,
			fromValue: parquet.FloatValue(0.125),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`0.125`)),
		},

		{
			scenario:  "double to true",
			fromType:  parquet.DoubleType,
			fromValue: parquet.DoubleValue(0.1),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(true),
		},

		{
			scenario:  "double to false",
			fromType:  parquet.DoubleType,
			fromValue: parquet.DoubleValue(0),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(false),
		},

		{
			scenario:  "double to int32",
			fromType:  parquet.DoubleType,
			fromValue: parquet.DoubleValue(9.9),
			toType:    parquet.Int32Type,
			toValue:   parquet.Int32Value(9),
		},

		{
			scenario:  "double to int64",
			fromType:  parquet.DoubleType,
			fromValue: parquet.DoubleValue(-1.5),
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(-1),
		},

		{
			scenario:  "double to float",
			fromType:  parquet.DoubleType,
			fromValue: parquet.DoubleValue(1.234),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(1.234),
		},

		{
			scenario:  "double to double",
			fromType:  parquet.DoubleType,
			fromValue: parquet.DoubleValue(-0.5),
			toType:    parquet.DoubleType,
			toValue:   parquet.DoubleValue(-0.5),
		},

		{
			scenario:  "double to string",
			fromType:  parquet.DoubleType,
			fromValue: parquet.DoubleValue(0.125),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`0.125`)),
		},

		{
			scenario:  "string to true",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`true`)),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(true),
		},

		{
			scenario:  "string to false",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`false`)),
			toType:    parquet.BooleanType,
			toValue:   parquet.BooleanValue(false),
		},

		{
			scenario:  "string to int32",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`-21`)),
			toType:    parquet.Int32Type,
			toValue:   parquet.Int32Value(-21),
		},

		{
			scenario:  "string to int64",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`42`)),
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(42),
		},

		{
			scenario:  "string to int96",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`123`)),
			toType:    parquet.Int96Type,
			toValue:   parquet.Int96Value(deprecated.Int96{0: 123}),
		},

		{
			scenario:  "string to float",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`-0.5`)),
			toType:    parquet.FloatType,
			toValue:   parquet.FloatValue(-0.5),
		},

		{
			scenario:  "string to double",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`0.5`)),
			toType:    parquet.DoubleType,
			toValue:   parquet.DoubleValue(0.5),
		},

		{
			scenario:  "string to byte array",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`ABC`)),
			toType:    parquet.ByteArrayType,
			toValue:   parquet.ByteArrayValue([]byte(`ABC`)),
		},

		{
			scenario:  "string to fixed length byte array",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`99B816772522447EBF76821A7C5ADF65`)),
			toType:    parquet.FixedLenByteArrayType(16),
			toValue: parquet.FixedLenByteArrayValue([]byte{
				0x99, 0xb8, 0x16, 0x77, 0x25, 0x22, 0x44, 0x7e,
				0xbf, 0x76, 0x82, 0x1a, 0x7c, 0x5a, 0xdf, 0x65,
			}),
		},

		{
			scenario:  "string to string",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`Hello World!`)),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`Hello World!`)),
		},

		{
			scenario:  "string to date",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`1970-01-03`)),
			toType:    parquet.Date().Type(),
			toValue:   parquet.Int32Value(2),
		},

		{
			scenario:  "string to millisecond time",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`12:34:56.789`)),
			toType:    parquet.Time(parquet.Millisecond).Type(),
			toValue:   parquet.Int32Value(45296789),
		},

		{
			scenario:  "string to microsecond time",
			fromType:  parquet.String().Type(),
			fromValue: parquet.ByteArrayValue([]byte(`12:34:56.789012`)),
			toType:    parquet.Time(parquet.Microsecond).Type(),
			toValue:   parquet.Int64Value(45296789012),
		},

		{
			scenario:  "date to millisecond timestamp",
			fromType:  parquet.Date().Type(),
			fromValue: parquet.Int32Value(19338),
			toType:    parquet.Timestamp(parquet.Millisecond).Type(),
			toValue:   parquet.Int64Value(1670803200000),
		},

		{
			scenario:  "date to microsecond timestamp",
			fromType:  parquet.Date().Type(),
			fromValue: parquet.Int32Value(19338),
			toType:    parquet.Timestamp(parquet.Microsecond).Type(),
			toValue:   parquet.Int64Value(1670803200000000),
		},

		{
			scenario:  "date to string",
			fromType:  parquet.Date().Type(),
			fromValue: parquet.Int32Value(18995),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`2022-01-03`)),
		},

		{
			scenario:  "millisecond time to string",
			fromType:  parquet.Time(parquet.Millisecond).Type(),
			fromValue: parquet.Int32Value(45296789),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`12:34:56.789`)),
		},

		{
			scenario:  "microsecond time to string",
			fromType:  parquet.Time(parquet.Microsecond).Type(),
			fromValue: parquet.Int64Value(45296789012),
			toType:    parquet.String().Type(),
			toValue:   parquet.ByteArrayValue([]byte(`12:34:56.789012`)),
		},

		{
			scenario:  "millisecond timestamp to date",
			fromType:  parquet.Timestamp(parquet.Millisecond).Type(),
			fromValue: parquet.Int64Value(1670888613000),
			toType:    parquet.Date().Type(),
			toValue:   parquet.Int32Value(19338),
		},

		{
			scenario:  "microsecond timestamp to date",
			fromType:  parquet.Timestamp(parquet.Microsecond).Type(),
			fromValue: parquet.Int64Value(1670888613000123),
			toType:    parquet.Date().Type(),
			toValue:   parquet.Int32Value(19338),
		},

		{
			scenario:  "millisecond timestamp to millisecond time",
			fromType:  parquet.Timestamp(parquet.Millisecond).Type(),
			fromValue: parquet.Int64Value(1670888613123),
			toType:    parquet.Time(parquet.Millisecond).Type(),
			toValue:   parquet.Int32Value(85413123),
		},

		{
			scenario:  "millisecond timestamp to micronsecond time",
			fromType:  parquet.Timestamp(parquet.Millisecond).Type(),
			fromValue: parquet.Int64Value(1670888613123),
			toType:    parquet.Time(parquet.Microsecond).Type(),
			toValue:   parquet.Int64Value(85413123000),
		},

		{
			scenario:  "microsecond timestamp to millisecond time",
			fromType:  parquet.Timestamp(parquet.Microsecond).Type(),
			fromValue: parquet.Int64Value(1670888613123456),
			toType:    parquet.Time(parquet.Millisecond).Type(),
			toValue:   parquet.Int32Value(85413123),
		},

		{
			scenario:  "microsecond timestamp to micronsecond time",
			fromType:  parquet.Timestamp(parquet.Microsecond).Type(),
			fromValue: parquet.Int64Value(1670888613123456),
			toType:    parquet.Time(parquet.Microsecond).Type(),
			toValue:   parquet.Int64Value(85413123456),
		},

		{
			scenario:  "micros to nanos",
			fromType:  usType,
			fromValue: usVal,
			toType:    nsType,
			toValue:   parquet.Int64Value(ns),
		},

		{
			scenario:  "millis to nanos",
			fromType:  msType,
			fromValue: msVal,
			toType:    nsType,
			toValue:   parquet.Int64Value(ns),
		},

		{
			scenario:  "nanos to micros",
			fromType:  nsType,
			fromValue: nsVal,
			toType:    usType,
			toValue:   parquet.Int64Value(us),
		},

		{
			scenario:  "nanos to nanos",
			fromType:  nsType,
			fromValue: nsVal,
			toType:    nsType,
			toValue:   parquet.Int64Value(ns),
		},

		{
			scenario:  "int64 to nanos",
			fromType:  parquet.Int64Type,
			fromValue: nsVal,
			toType:    nsType,
			toValue:   parquet.Int64Value(ns),
		},

		{
			scenario:  "int64 to int64",
			fromType:  parquet.Int64Type,
			fromValue: nsVal,
			toType:    parquet.Int64Type,
			toValue:   parquet.Int64Value(ns),
		},
	}

	for _, test := range timestampConversionTests {
		t.Run(test.scenario, func(t *testing.T) {
			// Set levels to ensure that they are retained by the conversion.
			from := test.fromValue.Level(1, 2, 3)
			want := test.toValue.Level(1, 2, 3)

			got, err := test.toType.ConvertValue(from, test.fromType)
			if err != nil {
				t.Fatal(err)
			}

			if !parquet.DeepEqual(want, got) {
				t.Errorf("converted value mismatch:\nwant = %+v\ngot  = %+v", want, got)
			}
		})
	}
}


================================================
FILE: dedupe.go
================================================
package parquet

// DedupeRowReader constructs a row reader which drops duplicated consecutive
// rows, according to the comparator function passed as argument.
//
// If the underlying reader produces a sequence of rows sorted by the same
// comparison predicate, the output is guaranteed to produce unique rows only.
func DedupeRowReader(reader RowReader, compare func(Row, Row) int) RowReader {
	return &dedupeRowReader{reader: reader, compare: compare}
}

type dedupeRowReader struct {
	reader  RowReader
	compare func(Row, Row) int
	dedupe
}

func (d *dedupeRowReader) ReadRows(rows []Row) (int, error) {
	for {
		n, err := d.reader.ReadRows(rows)
		n = d.deduplicate(rows[:n], d.compare)

		if n > 0 || err != nil {
			return n, err
		}
	}
}

// DedupeRowWriter constructs a row writer which drops duplicated consecutive
// rows, according to the comparator function passed as argument.
//
// If the writer is given a sequence of rows sorted by the same comparison
// predicate, the output is guaranteed to contain unique rows only.
func DedupeRowWriter(writer RowWriter, compare func(Row, Row) int) RowWriter {
	return &dedupeRowWriter{writer: writer, compare: compare}
}

type dedupeRowWriter struct {
	writer  RowWriter
	compare func(Row, Row) int
	dedupe
	rows []Row
}

func (d *dedupeRowWriter) WriteRows(rows []Row) (int, error) {
	// We need to make a copy because we cannot modify the rows slice received
	// as argument to respect the RowWriter contract.
	d.rows = append(d.rows[:0], rows...)
	defer func() {
		for i := range d.rows {
			d.rows[i] = Row{}
		}
	}()

	if n := d.deduplicate(d.rows, d.compare); n > 0 {
		w, err := d.writer.WriteRows(d.rows[:n])
		if err != nil {
			return w, err
		}
	}

	// Return the number of rows received instead of the number of deduplicated
	// rows actually written to the underlying writer because we have to repsect
	// the RowWriter contract.
	return len(rows), nil
}

type dedupe struct {
	alloc   rowAllocator
	lastRow Row
	uniq    []Row
	dupe    []Row
}

func (d *dedupe) reset() {
	d.alloc.reset()
	d.lastRow = d.lastRow[:0]
}

func (d *dedupe) deduplicate(rows []Row, compare func(Row, Row) int) int {
	defer func() {
		for i := range d.uniq {
			d.uniq[i] = Row{}
		}
		for i := range d.dupe {
			d.dupe[i] = Row{}
		}
		d.uniq = d.uniq[:0]
		d.dupe = d.dupe[:0]
	}()

	lastRow := d.lastRow

	for _, row := range rows {
		if len(lastRow) != 0 && compare(row, lastRow) == 0 {
			d.dupe = append(d.dupe, row)
		} else {
			lastRow = row
			d.uniq = append(d.uniq, row)
		}
	}

	rows = rows[:0]
	rows = append(rows, d.uniq...)
	rows = append(rows, d.dupe...)

	d.alloc.reset()
	d.alloc.capture(lastRow)
	d.lastRow = append(d.lastRow[:0], lastRow...)
	return len(d.uniq)
}


================================================
FILE: dedupe_test.go
================================================
//go:build go1.18

package parquet_test

import (
	"sort"
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestDedupeRowReader(t *testing.T) {
	type Row struct {
		Value int32 `parquet:"value"`
	}

	rows := make([]Row, 1000)
	for i := range rows {
		rows[i].Value = int32(i / 3)
	}

	dedupeMap := make(map[Row]struct{}, len(rows))
	for _, row := range rows {
		dedupeMap[row] = struct{}{}
	}

	dedupeRows := make([]Row, 0, len(dedupeMap))
	for row := range dedupeMap {
		dedupeRows = append(dedupeRows, row)
	}

	sort.Slice(dedupeRows, func(i, j int) bool {
		return dedupeRows[i].Value < dedupeRows[j].Value
	})

	buffer1 := parquet.NewRowBuffer[Row]()
	buffer1.Write(rows)

	buffer1Rows := buffer1.Rows()
	defer buffer1Rows.Close()

	buffer2 := parquet.NewRowBuffer[Row]()

	_, err := parquet.CopyRows(buffer2,
		parquet.DedupeRowReader(buffer1Rows,
			buffer1.Schema().Comparator(parquet.Ascending("value")),
		),
	)
	if err != nil {
		t.Fatal(err)
	}

	reader := parquet.NewGenericRowGroupReader[Row](buffer2)
	defer reader.Close()

	n, _ := reader.Read(rows)
	assertRowsEqual(t, dedupeRows, rows[:n])
}

func TestDedupeRowWriter(t *testing.T) {
	type Row struct {
		Value int32 `parquet:"value"`
	}

	rows := make([]Row, 1000)
	for i := range rows {
		rows[i].Value = int32(i / 3)
	}

	dedupeMap := make(map[Row]struct{}, len(rows))
	for _, row := range rows {
		dedupeMap[row] = struct{}{}
	}

	dedupeRows := make([]Row, 0, len(dedupeMap))
	for row := range dedupeMap {
		dedupeRows = append(dedupeRows, row)
	}

	sort.Slice(dedupeRows, func(i, j int) bool {
		return dedupeRows[i].Value < dedupeRows[j].Value
	})

	buffer1 := parquet.NewRowBuffer[Row]()
	buffer1.Write(rows)

	buffer1Rows := buffer1.Rows()
	defer buffer1Rows.Close()

	buffer2 := parquet.NewRowBuffer[Row]()

	_, err := parquet.CopyRows(
		parquet.DedupeRowWriter(buffer2,
			buffer1.Schema().Comparator(parquet.Ascending("value")),
		),
		buffer1Rows,
	)
	if err != nil {
		t.Fatal(err)
	}

	reader := parquet.NewGenericRowGroupReader[Row](buffer2)
	defer reader.Close()

	n, _ := reader.Read(rows)
	assertRowsEqual(t, dedupeRows, rows[:n])
}


================================================
FILE: deprecated/int96.go
================================================
package deprecated

import (
	"math/big"
	"math/bits"
	"unsafe"
)

// Int96 is an implementation of the deprecated INT96 parquet type.
type Int96 [3]uint32

// Int32ToInt96 converts a int32 value to a Int96.
func Int32ToInt96(value int32) (i96 Int96) {
	if value < 0 {
		i96[2] = 0xFFFFFFFF
		i96[1] = 0xFFFFFFFF
	}
	i96[0] = uint32(value)
	return
}

// Int64ToInt96 converts a int64 value to Int96.
func Int64ToInt96(value int64) (i96 Int96) {
	if value < 0 {
		i96[2] = 0xFFFFFFFF
	}
	i96[1] = uint32(value >> 32)
	i96[0] = uint32(value)
	return
}

// IsZero returns true if i is the zero-value.
func (i Int96) IsZero() bool { return i == Int96{} }

// Negative returns true if i is a negative value.
func (i Int96) Negative() bool {
	return (i[2] >> 31) != 0
}

// Less returns true if i < j.
//
// The method implements a signed comparison between the two operands.
func (i Int96) Less(j Int96) bool {
	if i.Negative() {
		if !j.Negative() {
			return true
		}
	} else {
		if j.Negative() {
			return false
		}
	}
	for k := 2; k >= 0; k-- {
		a, b := i[k], j[k]
		switch {
		case a < b:
			return true
		case a > b:
			return false
		}
	}
	return false
}

// Int converts i to a big.Int representation.
func (i Int96) Int() *big.Int {
	z := new(big.Int)
	z.Or(z, big.NewInt(int64(i[2])<<32|int64(i[1])))
	z.Lsh(z, 32)
	z.Or(z, big.NewInt(int64(i[0])))
	return z
}

// Int32 converts i to a int32, potentially truncating the value.
func (i Int96) Int32() int32 {
	return int32(i[0])
}

// Int64 converts i to a int64, potentially truncating the value.
func (i Int96) Int64() int64 {
	return int64(i[1])<<32 | int64(i[0])
}

// String returns a string representation of i.
func (i Int96) String() string {
	return i.Int().String()
}

// Len returns the minimum length in bits required to store the value of i.
func (i Int96) Len() int {
	switch {
	case i[2] != 0:
		return 64 + bits.Len32(i[2])
	case i[1] != 0:
		return 32 + bits.Len32(i[1])
	default:
		return bits.Len32(i[0])
	}
}

// Int96ToBytes converts the slice of Int96 values to a slice of bytes sharing
// the same backing array.
func Int96ToBytes(data []Int96) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 12*len(data))
}

// BytesToInt96 converts the byte slice passed as argument to a slice of Int96
// sharing the same backing array.
//
// When the number of bytes in the input is not a multiple of 12, the function
// truncates it in the returned slice.
func BytesToInt96(data []byte) []Int96 {
	return unsafe.Slice(*(**Int96)(unsafe.Pointer(&data)), len(data)/12)
}

func MaxLenInt96(data []Int96) int {
	max := 0
	for i := range data {
		n := data[i].Len()
		if n > max {
			max = n
		}
	}
	return max
}

func MinInt96(data []Int96) (min Int96) {
	if len(data) > 0 {
		min = data[0]
		for _, v := range data[1:] {
			if v.Less(min) {
				min = v
			}
		}
	}
	return min
}

func MaxInt96(data []Int96) (max Int96) {
	if len(data) > 0 {
		max = data[0]
		for _, v := range data[1:] {
			if max.Less(v) {
				max = v
			}
		}
	}
	return max
}

func MinMaxInt96(data []Int96) (min, max Int96) {
	if len(data) > 0 {
		min = data[0]
		max = data[0]
		for _, v := range data[1:] {
			if v.Less(min) {
				min = v
			}
			if max.Less(v) {
				max = v
			}
		}
	}
	return min, max
}

func OrderOfInt96(data []Int96) int {
	if len(data) > 1 {
		if int96AreInAscendingOrder(data) {
			return +1
		}
		if int96AreInDescendingOrder(data) {
			return -1
		}
	}
	return 0
}

func int96AreInAscendingOrder(data []Int96) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i].Less(data[i-1]) {
			return false
		}
	}
	return true
}

func int96AreInDescendingOrder(data []Int96) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1].Less(data[i]) {
			return false
		}
	}
	return true
}


================================================
FILE: deprecated/int96_test.go
================================================
package deprecated_test

import (
	"fmt"
	"testing"

	"github.com/segmentio/parquet-go/deprecated"
)

func TestInt96Less(t *testing.T) {
	tests := []struct {
		i    deprecated.Int96
		j    deprecated.Int96
		less bool
	}{
		{
			i:    deprecated.Int96{},
			j:    deprecated.Int96{},
			less: false,
		},

		{
			i:    deprecated.Int96{0: 1},
			j:    deprecated.Int96{0: 2},
			less: true,
		},

		{
			i:    deprecated.Int96{0: 1},
			j:    deprecated.Int96{1: 1},
			less: true,
		},

		{
			i:    deprecated.Int96{0: 1},
			j:    deprecated.Int96{2: 1},
			less: true,
		},

		{
			i:    deprecated.Int96{0: 0xFFFFFFFF, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -1
			j:    deprecated.Int96{},                                            // 0
			less: true,
		},

		{
			i:    deprecated.Int96{},                                            // 0
			j:    deprecated.Int96{0: 0xFFFFFFFF, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -1
			less: false,
		},

		{
			i:    deprecated.Int96{0: 0xFFFFFFFF, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -1
			j:    deprecated.Int96{0: 0xFFFFFFFF, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -1
			less: false,
		},

		{
			i:    deprecated.Int96{0: 0xFFFFFFFF, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -1
			j:    deprecated.Int96{0: 0xFFFFFFFE, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -2
			less: false,
		},

		{
			i:    deprecated.Int96{0: 0xFFFFFFFE, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -2
			j:    deprecated.Int96{0: 0xFFFFFFFF, 1: 0xFFFFFFFF, 2: 0xFFFFFFFF}, // -1
			less: true,
		},
	}

	for _, test := range tests {
		scenario := ""
		if test.less {
			scenario = fmt.Sprintf("%s<%s", test.i, test.j)
		} else {
			scenario = fmt.Sprintf("%s>=%s", test.i, test.j)
		}
		t.Run(scenario, func(t *testing.T) {
			if test.i.Less(test.j) != test.less {
				t.Error("FAIL")
			}
			if test.less {
				if test.j.Less(test.i) {
					t.Error("FAIL (inverse)")
				}
			}
		})
	}
}

func TestMaxLenInt96(t *testing.T) {
	for _, test := range []struct {
		data   []deprecated.Int96
		maxlen int
	}{
		{
			data:   nil,
			maxlen: 0,
		},

		{
			data:   []deprecated.Int96{{}, {}, {}, {}, {}},
			maxlen: 0,
		},

		{
			data:   []deprecated.Int96{{0: 0x01}, {0: 0xFF}, {1: 0x02}, {0: 0xF0}},
			maxlen: 34,
		},
	} {
		t.Run("", func(t *testing.T) {
			if maxlen := deprecated.MaxLenInt96(test.data); maxlen != test.maxlen {
				t.Errorf("want=%d got=%d", test.maxlen, maxlen)
			}
		})
	}
}


================================================
FILE: deprecated/parquet.go
================================================
package deprecated

// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet.
// ConvertedType is superseded by LogicalType.  This enum should not be extended.
//
// See LogicalTypes.md for conversion between ConvertedType and LogicalType.
type ConvertedType int32

const (
	// a BYTE_ARRAY actually contains UTF8 encoded chars
	UTF8 ConvertedType = 0

	// a map is converted as an optional field containing a repeated key/value pair
	Map ConvertedType = 1

	// a key/value pair is converted into a group of two fields
	MapKeyValue ConvertedType = 2

	// a list is converted into an optional field containing a repeated field for its
	// values
	List ConvertedType = 3

	// an enum is converted into a binary field
	Enum ConvertedType = 4

	// A decimal value.
	//
	// This may be used to annotate binary or fixed primitive types. The
	// underlying byte array stores the unscaled value encoded as two's
	// complement using big-endian byte order (the most significant byte is the
	// zeroth element). The value of the decimal is the value * 10^{-scale}.
	//
	// This must be accompanied by a (maximum) precision and a scale in the
	// SchemaElement. The precision specifies the number of digits in the decimal
	// and the scale stores the location of the decimal point. For example 1.23
	// would have precision 3 (3 total digits) and scale 2 (the decimal point is
	// 2 digits over).
	Decimal ConvertedType = 5

	// A Date
	//
	// Stored as days since Unix epoch, encoded as the INT32 physical type.
	Date ConvertedType = 6

	// A time
	//
	// The total number of milliseconds since midnight.  The value is stored
	// as an INT32 physical type.
	TimeMillis ConvertedType = 7

	// A time.
	//
	// The total number of microseconds since midnight.  The value is stored as
	// an INT64 physical type.
	TimeMicros ConvertedType = 8

	// A date/time combination
	//
	// Date and time recorded as milliseconds since the Unix epoch.  Recorded as
	// a physical type of INT64.
	TimestampMillis ConvertedType = 9

	// A date/time combination
	//
	// Date and time recorded as microseconds since the Unix epoch.  The value is
	// stored as an INT64 physical type.
	TimestampMicros ConvertedType = 10

	// An unsigned integer value.
	//
	// The number describes the maximum number of meaningful data bits in
	// the stored value. 8, 16 and 32 bit values are stored using the
	// INT32 physical type.  64 bit values are stored using the INT64
	// physical type.
	Uint8  ConvertedType = 11
	Uint16 ConvertedType = 12
	Uint32 ConvertedType = 13
	Uint64 ConvertedType = 14

	// A signed integer value.
	//
	// The number describes the maximum number of meaningful data bits in
	// the stored value. 8, 16 and 32 bit values are stored using the
	// INT32 physical type.  64 bit values are stored using the INT64
	// physical type.
	Int8  ConvertedType = 15
	Int16 ConvertedType = 16
	Int32 ConvertedType = 17
	Int64 ConvertedType = 18

	// An embedded JSON document
	//
	// A JSON document embedded within a single UTF8 column.
	Json ConvertedType = 19

	// An embedded BSON document
	//
	// A BSON document embedded within a single BINARY column.
	Bson ConvertedType = 20

	// An interval of time
	//
	// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12
	// This data is composed of three separate little endian unsigned
	// integers.  Each stores a component of a duration of time.  The first
	// integer identifies the number of months associated with the duration,
	// the second identifies the number of days associated with the duration
	// and the third identifies the number of milliseconds associated with
	// the provided duration.  This duration of time is independent of any
	// particular timezone or date.
	Interval ConvertedType = 21
)


================================================
FILE: dictionary.go
================================================
package parquet

import (
	"io"
	"math/bits"
	"unsafe"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/hashprobe"
	"github.com/segmentio/parquet-go/internal/bitpack"
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
)

const (
	// Maximum load of probing tables. This parameter configures the balance
	// between memory density and compute time of probing operations. Valid
	// values are floating point numbers between 0 and 1.
	//
	// Smaller values result in lower collision probability when inserting
	// values in probing tables, but also increase memory utilization.
	//
	// TODO: make this configurable by the application?
	hashprobeTableMaxLoad = 0.85

	// An estimate of the CPU cache footprint used by insert operations.
	//
	// This constant is used to determine a useful chunk size depending on the
	// size of values being inserted in dictionaries. More values of small size
	// can fit in CPU caches, so the inserts can operation on larger chunks.
	insertsTargetCacheFootprint = 8192
)

// The Dictionary interface represents type-specific implementations of parquet
// dictionaries.
//
// Programs can instantiate dictionaries by call the NewDictionary method of a
// Type object.
//
// The current implementation has a limitation which prevents applications from
// providing custom versions of this interface because it contains unexported
// methods. The only way to create Dictionary values is to call the
// NewDictionary of Type instances. This limitation may be lifted in future
// releases.
type Dictionary interface {
	// Returns the type that the dictionary was created from.
	Type() Type

	// Returns the number of value indexed in the dictionary.
	Len() int

	// Returns the dictionary value at the given index.
	Index(index int32) Value

	// Inserts values from the second slice to the dictionary and writes the
	// indexes at which each value was inserted to the first slice.
	//
	// The method panics if the length of the indexes slice is smaller than the
	// length of the values slice.
	Insert(indexes []int32, values []Value)

	// Given an array of dictionary indexes, lookup the values into the array
	// of values passed as second argument.
	//
	// The method panics if len(indexes) > len(values), or one of the indexes
	// is negative or greater than the highest index in the dictionary.
	Lookup(indexes []int32, values []Value)

	// Returns the min and max values found in the given indexes.
	Bounds(indexes []int32) (min, max Value)

	// Resets the dictionary to its initial state, removing all values.
	Reset()

	// Returns a Page representing the content of the dictionary.
	//
	// The returned page shares the underlying memory of the buffer, it remains
	// valid to use until the dictionary's Reset method is called.
	Page() Page

	// See ColumnBuffer.writeValues for details on the use of unexported methods
	// on interfaces.
	insert(indexes []int32, rows sparse.Array)
	//lookup(indexes []int32, rows sparse.Array)
}

func checkLookupIndexBounds(indexes []int32, rows sparse.Array) {
	if rows.Len() < len(indexes) {
		panic("dictionary lookup with more indexes than values")
	}
}

// The boolean dictionary always contains two values for true and false.
type booleanDictionary struct {
	booleanPage
	// There are only two possible values for booleans, false and true.
	// Rather than using a Go map, we track the indexes of each values
	// in an array of two 32 bits integers. When inserting values in the
	// dictionary, we ensure that an index exist for each boolean value,
	// then use the value 0 or 1 (false or true) to perform a lookup in
	// the dictionary's map.
	table [2]int32
}

func newBooleanDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *booleanDictionary {
	indexOfFalse, indexOfTrue, values := int32(-1), int32(-1), data.Boolean()

	for i := int32(0); i < numValues && indexOfFalse < 0 && indexOfTrue < 0; i += 8 {
		v := values[i]
		if v != 0x00 {
			indexOfTrue = i + int32(bits.TrailingZeros8(v))
		}
		if v != 0xFF {
			indexOfFalse = i + int32(bits.TrailingZeros8(^v))
		}
	}

	return &booleanDictionary{
		booleanPage: booleanPage{
			typ:         typ,
			bits:        values[:bitpack.ByteCount(uint(numValues))],
			numValues:   numValues,
			columnIndex: ^columnIndex,
		},
		table: [2]int32{
			0: indexOfFalse,
			1: indexOfTrue,
		},
	}
}

func (d *booleanDictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *booleanDictionary) Len() int { return int(d.numValues) }

func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int(i)) }

func (d *booleanDictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) {
	_ = indexes[:rows.Len()]

	if d.table[0] < 0 {
		d.table[0] = d.numValues
		d.numValues++
		d.bits = plain.AppendBoolean(d.bits, int(d.table[0]), false)
	}

	if d.table[1] < 0 {
		d.table[1] = d.numValues
		d.numValues++
		d.bits = plain.AppendBoolean(d.bits, int(d.table[1]), true)
	}

	values := rows.Uint8Array()
	dict := d.table

	for i := 0; i < rows.Len(); i++ {
		v := values.Index(i) & 1
		indexes[i] = dict[v]
	}
}

func (d *booleanDictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValue(false)
	memsetValues(values, model)
	d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *booleanDictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(*bool)(rows.Index(i)) = d.index(j)
	}
}

func (d *booleanDictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		hasFalse, hasTrue := false, false

		for _, i := range indexes {
			v := d.index(i)
			if v {
				hasTrue = true
			} else {
				hasFalse = true
			}
			if hasTrue && hasFalse {
				break
			}
		}

		min = d.makeValue(!hasFalse)
		max = d.makeValue(hasTrue)
	}
	return min, max
}

func (d *booleanDictionary) Reset() {
	d.bits = d.bits[:0]
	d.offset = 0
	d.numValues = 0
	d.table = [2]int32{-1, -1}
}

func (d *booleanDictionary) Page() Page {
	return &d.booleanPage
}

type int32Dictionary struct {
	int32Page
	table *hashprobe.Int32Table
}

func newInt32Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *int32Dictionary {
	return &int32Dictionary{
		int32Page: int32Page{
			typ:         typ,
			values:      data.Int32()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *int32Dictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *int32Dictionary) Len() int { return len(d.values) }

func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *int32Dictionary) index(i int32) int32 { return d.values[i] }

func (d *int32Dictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *int32Dictionary) init(indexes []int32) {
	d.table = hashprobe.NewInt32Table(len(d.values), hashprobeTableMaxLoad)

	n := min(len(d.values), len(indexes))

	for i := 0; i < len(d.values); i += n {
		j := min(i+n, len(d.values))
		d.table.Probe(d.values[i:j:j], indexes[:n:n])
	}
}

func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) {
	// Iterating over the input in chunks helps keep relevant data in CPU
	// caches when a large number of values are inserted into the dictionary with
	// a single method call.
	//
	// Without this chunking, memory areas from the head of the indexes and
	// values arrays end up being evicted from CPU caches as the probing
	// operation iterates through the array. The subsequent scan of the indexes
	// required to determine which values must be inserted into the page then
	// stalls on retrieving data from main memory.
	//
	// We measured as much as ~37% drop in throughput when disabling the
	// chunking, and did not observe any penalties from having it on smaller
	// inserts.
	const chunkSize = insertsTargetCacheFootprint / 4

	if d.table == nil {
		d.init(indexes)
	}

	values := rows.Int32Array()

	for i := 0; i < values.Len(); i += chunkSize {
		j := min(i+chunkSize, values.Len())

		if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 {
			for k, index := range indexes[i:j] {
				if index == int32(len(d.values)) {
					d.values = append(d.values, values.Index(i+k))
				}
			}
		}
	}
}

func (d *int32Dictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValue(0)
	memsetValues(values, model)
	d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue, maxValue := d.bounds(indexes)
		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *int32Dictionary) Reset() {
	d.values = d.values[:0]
	if d.table != nil {
		d.table.Reset()
	}
}

func (d *int32Dictionary) Page() Page {
	return &d.int32Page
}

type int64Dictionary struct {
	int64Page
	table *hashprobe.Int64Table
}

func newInt64Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *int64Dictionary {
	return &int64Dictionary{
		int64Page: int64Page{
			typ:         typ,
			values:      data.Int64()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *int64Dictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *int64Dictionary) Len() int { return len(d.values) }

func (d *int64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *int64Dictionary) index(i int32) int64 { return d.values[i] }

func (d *int64Dictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *int64Dictionary) init(indexes []int32) {
	d.table = hashprobe.NewInt64Table(len(d.values), hashprobeTableMaxLoad)

	n := min(len(d.values), len(indexes))

	for i := 0; i < len(d.values); i += n {
		j := min(i+n, len(d.values))
		d.table.Probe(d.values[i:j:j], indexes[:n:n])
	}
}

func (d *int64Dictionary) insert(indexes []int32, rows sparse.Array) {
	const chunkSize = insertsTargetCacheFootprint / 8

	if d.table == nil {
		d.init(indexes)
	}

	values := rows.Int64Array()

	for i := 0; i < values.Len(); i += chunkSize {
		j := min(i+chunkSize, values.Len())

		if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 {
			for k, index := range indexes[i:j] {
				if index == int32(len(d.values)) {
					d.values = append(d.values, values.Index(i+k))
				}
			}
		}
	}
}

func (d *int64Dictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValue(0)
	memsetValues(values, model)
	d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *int64Dictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue, maxValue := d.bounds(indexes)
		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *int64Dictionary) Reset() {
	d.values = d.values[:0]
	if d.table != nil {
		d.table.Reset()
	}
}

func (d *int64Dictionary) Page() Page {
	return &d.int64Page
}

type int96Dictionary struct {
	int96Page
	hashmap map[deprecated.Int96]int32
}

func newInt96Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *int96Dictionary {
	return &int96Dictionary{
		int96Page: int96Page{
			typ:         typ,
			values:      data.Int96()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *int96Dictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *int96Dictionary) Len() int { return len(d.values) }

func (d *int96Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *int96Dictionary) index(i int32) deprecated.Int96 { return d.values[i] }

func (d *int96Dictionary) Insert(indexes []int32, values []Value) {
	d.insertValues(indexes, len(values), func(i int) deprecated.Int96 {
		return values[i].Int96()
	})
}

func (d *int96Dictionary) insert(indexes []int32, rows sparse.Array) {
	d.insertValues(indexes, rows.Len(), func(i int) deprecated.Int96 {
		return *(*deprecated.Int96)(rows.Index(i))
	})
}

func (d *int96Dictionary) insertValues(indexes []int32, count int, valueAt func(int) deprecated.Int96) {
	_ = indexes[:count]

	if d.hashmap == nil {
		d.hashmap = make(map[deprecated.Int96]int32, len(d.values))
		for i, v := range d.values {
			d.hashmap[v] = int32(i)
		}
	}

	for i := 0; i < count; i++ {
		value := valueAt(i)

		index, exists := d.hashmap[value]
		if !exists {
			index = int32(len(d.values))
			d.values = append(d.values, value)
			d.hashmap[value] = index
		}

		indexes[i] = index
	}
}

func (d *int96Dictionary) Lookup(indexes []int32, values []Value) {
	for i, j := range indexes {
		values[i] = d.Index(j)
	}
}

func (d *int96Dictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue := d.index(indexes[0])
		maxValue := minValue

		for _, i := range indexes[1:] {
			value := d.index(i)
			switch {
			case value.Less(minValue):
				minValue = value
			case maxValue.Less(value):
				maxValue = value
			}
		}

		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *int96Dictionary) Reset() {
	d.values = d.values[:0]
	d.hashmap = nil
}

func (d *int96Dictionary) Page() Page {
	return &d.int96Page
}

type floatDictionary struct {
	floatPage
	table *hashprobe.Float32Table
}

func newFloatDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *floatDictionary {
	return &floatDictionary{
		floatPage: floatPage{
			typ:         typ,
			values:      data.Float()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *floatDictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *floatDictionary) Len() int { return len(d.values) }

func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *floatDictionary) index(i int32) float32 { return d.values[i] }

func (d *floatDictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *floatDictionary) init(indexes []int32) {
	d.table = hashprobe.NewFloat32Table(len(d.values), hashprobeTableMaxLoad)

	n := min(len(d.values), len(indexes))

	for i := 0; i < len(d.values); i += n {
		j := min(i+n, len(d.values))
		d.table.Probe(d.values[i:j:j], indexes[:n:n])
	}
}

func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) {
	const chunkSize = insertsTargetCacheFootprint / 4

	if d.table == nil {
		d.init(indexes)
	}

	values := rows.Float32Array()

	for i := 0; i < values.Len(); i += chunkSize {
		j := min(i+chunkSize, values.Len())

		if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 {
			for k, index := range indexes[i:j] {
				if index == int32(len(d.values)) {
					d.values = append(d.values, values.Index(i+k))
				}
			}
		}
	}
}

func (d *floatDictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValue(0)
	memsetValues(values, model)
	d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue, maxValue := d.bounds(indexes)
		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *floatDictionary) Reset() {
	d.values = d.values[:0]
	if d.table != nil {
		d.table.Reset()
	}
}

func (d *floatDictionary) Page() Page {
	return &d.floatPage
}

type doubleDictionary struct {
	doublePage
	table *hashprobe.Float64Table
}

func newDoubleDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *doubleDictionary {
	return &doubleDictionary{
		doublePage: doublePage{
			typ:         typ,
			values:      data.Double()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *doubleDictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *doubleDictionary) Len() int { return len(d.values) }

func (d *doubleDictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *doubleDictionary) index(i int32) float64 { return d.values[i] }

func (d *doubleDictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *doubleDictionary) init(indexes []int32) {
	d.table = hashprobe.NewFloat64Table(len(d.values), hashprobeTableMaxLoad)

	n := min(len(d.values), len(indexes))

	for i := 0; i < len(d.values); i += n {
		j := min(i+n, len(d.values))
		d.table.Probe(d.values[i:j:j], indexes[:n:n])
	}
}

func (d *doubleDictionary) insert(indexes []int32, rows sparse.Array) {
	const chunkSize = insertsTargetCacheFootprint / 8

	if d.table == nil {
		d.init(indexes)
	}

	values := rows.Float64Array()

	for i := 0; i < values.Len(); i += chunkSize {
		j := min(i+chunkSize, values.Len())

		if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 {
			for k, index := range indexes[i:j] {
				if index == int32(len(d.values)) {
					d.values = append(d.values, values.Index(i+k))
				}
			}
		}
	}
}

func (d *doubleDictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValue(0)
	memsetValues(values, model)
	d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *doubleDictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue, maxValue := d.bounds(indexes)
		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *doubleDictionary) Reset() {
	d.values = d.values[:0]
	if d.table != nil {
		d.table.Reset()
	}
}

func (d *doubleDictionary) Page() Page {
	return &d.doublePage
}

type byteArrayDictionary struct {
	byteArrayPage
	table map[string]int32
	alloc allocator
}

func newByteArrayDictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *byteArrayDictionary {
	values, offsets := data.ByteArray()
	// The first offset must always be zero, and the last offset is the length
	// of the values in bytes.
	//
	// As an optimization we make the assumption that the backing array of the
	// offsets slice belongs to the dictionary.
	switch {
	case cap(offsets) == 0:
		offsets = make([]uint32, 1, 8)
	case len(offsets) == 0:
		offsets = append(offsets[:0], 0)
	}
	return &byteArrayDictionary{
		byteArrayPage: byteArrayPage{
			typ:         typ,
			values:      values,
			offsets:     offsets,
			columnIndex: ^columnIndex,
		},
	}
}

func (d *byteArrayDictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *byteArrayDictionary) Len() int { return d.len() }

func (d *byteArrayDictionary) Index(i int32) Value { return d.makeValueBytes(d.index(int(i))) }

func (d *byteArrayDictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
}

func (d *byteArrayDictionary) init() {
	numValues := d.len()
	d.table = make(map[string]int32, numValues)

	for i := 0; i < numValues; i++ {
		d.table[string(d.index(i))] = int32(len(d.table))
	}
}

func (d *byteArrayDictionary) insert(indexes []int32, rows sparse.Array) {
	if d.table == nil {
		d.init()
	}

	values := rows.StringArray()

	for i := range indexes {
		value := values.Index(i)

		index, exists := d.table[value]
		if !exists {
			value = d.alloc.copyString(value)
			index = int32(len(d.table))
			d.table[value] = index
			d.values = append(d.values, value...)
			d.offsets = append(d.offsets, uint32(len(d.values)))
		}

		indexes[i] = index
	}
}

func (d *byteArrayDictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValueString("")
	memsetValues(values, model)
	d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
}

func (d *byteArrayDictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		base := d.index(int(indexes[0]))
		minValue := unsafecast.BytesToString(base)
		maxValue := minValue
		values := [64]string{}

		for i := 1; i < len(indexes); i += len(values) {
			n := len(indexes) - i
			if n > len(values) {
				n = len(values)
			}
			j := i + n
			d.lookupString(indexes[i:j:j], makeArrayString(values[:n:n]))

			for _, value := range values[:n:n] {
				switch {
				case value < minValue:
					minValue = value
				case value > maxValue:
					maxValue = value
				}
			}
		}

		min = d.makeValueString(minValue)
		max = d.makeValueString(maxValue)
	}
	return min, max
}

func (d *byteArrayDictionary) Reset() {
	d.offsets = d.offsets[:1]
	d.values = d.values[:0]
	for k := range d.table {
		delete(d.table, k)
	}
	d.alloc.reset()
}

func (d *byteArrayDictionary) Page() Page {
	return &d.byteArrayPage
}

type fixedLenByteArrayDictionary struct {
	fixedLenByteArrayPage
	hashmap map[string]int32
}

func newFixedLenByteArrayDictionary(typ Type, columnIndex int16, numValues int32, values encoding.Values) *fixedLenByteArrayDictionary {
	data, size := values.FixedLenByteArray()
	return &fixedLenByteArrayDictionary{
		fixedLenByteArrayPage: fixedLenByteArrayPage{
			typ:         typ,
			size:        size,
			data:        data,
			columnIndex: ^columnIndex,
		},
	}
}

func (d *fixedLenByteArrayDictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *fixedLenByteArrayDictionary) Len() int { return len(d.data) / d.size }

func (d *fixedLenByteArrayDictionary) Index(i int32) Value {
	return d.makeValueBytes(d.index(i))
}

func (d *fixedLenByteArrayDictionary) index(i int32) []byte {
	j := (int(i) + 0) * d.size
	k := (int(i) + 1) * d.size
	return d.data[j:k:k]
}

func (d *fixedLenByteArrayDictionary) Insert(indexes []int32, values []Value) {
	d.insertValues(indexes, len(values), func(i int) *byte {
		return values[i].ptr
	})
}

func (d *fixedLenByteArrayDictionary) insert(indexes []int32, rows sparse.Array) {
	d.insertValues(indexes, rows.Len(), func(i int) *byte {
		return (*byte)(rows.Index(i))
	})
}

func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, count int, valueAt func(int) *byte) {
	_ = indexes[:count]

	if d.hashmap == nil {
		d.hashmap = make(map[string]int32, cap(d.data)/d.size)
		for i, j := 0, int32(0); i < len(d.data); i += d.size {
			d.hashmap[string(d.data[i:i+d.size])] = j
			j++
		}
	}

	for i := 0; i < count; i++ {
		value := unsafe.Slice(valueAt(i), d.size)

		index, exists := d.hashmap[string(value)]
		if !exists {
			index = int32(d.Len())
			start := len(d.data)
			d.data = append(d.data, value...)
			d.hashmap[string(d.data[start:])] = index
		}

		indexes[i] = index
	}
}

func (d *fixedLenByteArrayDictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValueString("")
	memsetValues(values, model)
	d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
}

func (d *fixedLenByteArrayDictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		base := d.index(indexes[0])
		minValue := unsafecast.BytesToString(base)
		maxValue := minValue
		values := [64]string{}

		for i := 1; i < len(indexes); i += len(values) {
			n := len(indexes) - i
			if n > len(values) {
				n = len(values)
			}
			j := i + n
			d.lookupString(indexes[i:j:j], makeArrayString(values[:n:n]))

			for _, value := range values[:n:n] {
				switch {
				case value < minValue:
					minValue = value
				case value > maxValue:
					maxValue = value
				}
			}
		}

		min = d.makeValueString(minValue)
		max = d.makeValueString(maxValue)
	}
	return min, max
}

func (d *fixedLenByteArrayDictionary) Reset() {
	d.data = d.data[:0]
	d.hashmap = nil
}

func (d *fixedLenByteArrayDictionary) Page() Page {
	return &d.fixedLenByteArrayPage
}

type uint32Dictionary struct {
	uint32Page
	table *hashprobe.Uint32Table
}

func newUint32Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *uint32Dictionary {
	return &uint32Dictionary{
		uint32Page: uint32Page{
			typ:         typ,
			values:      data.Uint32()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *uint32Dictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *uint32Dictionary) Len() int { return len(d.values) }

func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] }

func (d *uint32Dictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *uint32Dictionary) init(indexes []int32) {
	d.table = hashprobe.NewUint32Table(len(d.values), hashprobeTableMaxLoad)

	n := min(len(d.values), len(indexes))

	for i := 0; i < len(d.values); i += n {
		j := min(i+n, len(d.values))
		d.table.Probe(d.values[i:j:j], indexes[:n:n])
	}
}

func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) {
	const chunkSize = insertsTargetCacheFootprint / 4

	if d.table == nil {
		d.init(indexes)
	}

	values := rows.Uint32Array()

	for i := 0; i < values.Len(); i += chunkSize {
		j := min(i+chunkSize, values.Len())

		if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 {
			for k, index := range indexes[i:j] {
				if index == int32(len(d.values)) {
					d.values = append(d.values, values.Index(i+k))
				}
			}
		}
	}
}

func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValue(0)
	memsetValues(values, model)
	d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue, maxValue := d.bounds(indexes)
		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *uint32Dictionary) Reset() {
	d.values = d.values[:0]
	if d.table != nil {
		d.table.Reset()
	}
}

func (d *uint32Dictionary) Page() Page {
	return &d.uint32Page
}

type uint64Dictionary struct {
	uint64Page
	table *hashprobe.Uint64Table
}

func newUint64Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *uint64Dictionary {
	return &uint64Dictionary{
		uint64Page: uint64Page{
			typ:         typ,
			values:      data.Uint64()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *uint64Dictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *uint64Dictionary) Len() int { return len(d.values) }

func (d *uint64Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *uint64Dictionary) index(i int32) uint64 { return d.values[i] }

func (d *uint64Dictionary) Insert(indexes []int32, values []Value) {
	model := Value{}
	d.insert(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *uint64Dictionary) init(indexes []int32) {
	d.table = hashprobe.NewUint64Table(len(d.values), hashprobeTableMaxLoad)

	n := min(len(d.values), len(indexes))

	for i := 0; i < len(d.values); i += n {
		j := min(i+n, len(d.values))
		d.table.Probe(d.values[i:j:j], indexes[:n:n])
	}
}

func (d *uint64Dictionary) insert(indexes []int32, rows sparse.Array) {
	const chunkSize = insertsTargetCacheFootprint / 8

	if d.table == nil {
		d.init(indexes)
	}

	values := rows.Uint64Array()

	for i := 0; i < values.Len(); i += chunkSize {
		j := min(i+chunkSize, values.Len())

		if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 {
			for k, index := range indexes[i:j] {
				if index == int32(len(d.values)) {
					d.values = append(d.values, values.Index(i+k))
				}
			}
		}
	}
}

func (d *uint64Dictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValue(0)
	memsetValues(values, model)
	d.lookup(indexes, makeArrayValue(values, unsafe.Offsetof(model.u64)))
}

func (d *uint64Dictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue, maxValue := d.bounds(indexes)
		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *uint64Dictionary) Reset() {
	d.values = d.values[:0]
	if d.table != nil {
		d.table.Reset()
	}
}

func (d *uint64Dictionary) Page() Page {
	return &d.uint64Page
}

type be128Dictionary struct {
	be128Page
	table *hashprobe.Uint128Table
}

func newBE128Dictionary(typ Type, columnIndex int16, numValues int32, data encoding.Values) *be128Dictionary {
	return &be128Dictionary{
		be128Page: be128Page{
			typ:         typ,
			values:      data.Uint128()[:numValues],
			columnIndex: ^columnIndex,
		},
	}
}

func (d *be128Dictionary) Type() Type { return newIndexedType(d.typ, d) }

func (d *be128Dictionary) Len() int { return len(d.values) }

func (d *be128Dictionary) Index(i int32) Value { return d.makeValue(d.index(i)) }

func (d *be128Dictionary) index(i int32) *[16]byte { return &d.values[i] }

func (d *be128Dictionary) Insert(indexes []int32, values []Value) {
	_ = indexes[:len(values)]

	for _, v := range values {
		if v.kind != ^int8(FixedLenByteArray) {
			panic("values inserted in BE128 dictionary must be of type BYTE_ARRAY")
		}
		if v.u64 != 16 {
			panic("values inserted in BE128 dictionary must be of length 16")
		}
	}

	if d.table == nil {
		d.init(indexes)
	}

	const chunkSize = insertsTargetCacheFootprint / 16
	var buffer [chunkSize][16]byte

	for i := 0; i < len(values); i += chunkSize {
		j := min(chunkSize+i, len(values))
		n := min(chunkSize, len(values)-i)

		probe := buffer[:n:n]
		writePointersBE128(probe, makeArrayValue(values[i:j], unsafe.Offsetof(values[i].ptr)))

		if d.table.Probe(probe, indexes[i:j:j]) > 0 {
			for k, v := range probe {
				if indexes[i+k] == int32(len(d.values)) {
					d.values = append(d.values, v)
				}
			}
		}
	}
}

func (d *be128Dictionary) init(indexes []int32) {
	d.table = hashprobe.NewUint128Table(len(d.values), 0.75)

	n := min(len(d.values), len(indexes))

	for i := 0; i < len(d.values); i += n {
		j := min(i+n, len(d.values))
		d.table.Probe(d.values[i:j:j], indexes[:n:n])
	}
}

func (d *be128Dictionary) insert(indexes []int32, rows sparse.Array) {
	const chunkSize = insertsTargetCacheFootprint / 16

	if d.table == nil {
		d.init(indexes)
	}

	values := rows.Uint128Array()

	for i := 0; i < values.Len(); i += chunkSize {
		j := min(i+chunkSize, values.Len())

		if d.table.ProbeArray(values.Slice(i, j), indexes[i:j:j]) > 0 {
			for k, index := range indexes[i:j] {
				if index == int32(len(d.values)) {
					d.values = append(d.values, values.Index(i+k))
				}
			}
		}
	}
}

func (d *be128Dictionary) Lookup(indexes []int32, values []Value) {
	model := d.makeValueString("")
	memsetValues(values, model)
	d.lookupString(indexes, makeArrayValue(values, unsafe.Offsetof(model.ptr)))
}

func (d *be128Dictionary) Bounds(indexes []int32) (min, max Value) {
	if len(indexes) > 0 {
		minValue, maxValue := d.bounds(indexes)
		min = d.makeValue(minValue)
		max = d.makeValue(maxValue)
	}
	return min, max
}

func (d *be128Dictionary) Reset() {
	d.values = d.values[:0]
	if d.table != nil {
		d.table.Reset()
	}
}

func (d *be128Dictionary) Page() Page {
	return &d.be128Page
}

// indexedType is a wrapper around a Type value which overrides object
// constructors to use indexed versions referencing values in the dictionary
// instead of storing plain values.
type indexedType struct {
	Type
	dict Dictionary
}

func newIndexedType(typ Type, dict Dictionary) *indexedType {
	return &indexedType{Type: typ, dict: dict}
}

func (t *indexedType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newIndexedColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t *indexedType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newIndexedPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

// indexedPage is an implementation of the Page interface which stores
// indexes instead of plain value. The indexes reference the values in a
// dictionary that the page was created for.
type indexedPage struct {
	typ         *indexedType
	values      []int32
	columnIndex int16
}

func newIndexedPage(typ *indexedType, columnIndex int16, numValues int32, data encoding.Values) *indexedPage {
	// RLE encoded values that contain dictionary indexes in data pages are
	// sometimes truncated when they contain only zeros. We account for this
	// special case here and extend the values buffer if it is shorter than
	// needed to hold `numValues`.
	size := int(numValues)
	values := data.Int32()

	if len(values) < size {
		if cap(values) < size {
			tmp := make([]int32, size)
			copy(tmp, values)
			values = tmp
		} else {
			clear := values[len(values) : len(values)+size]
			for i := range clear {
				clear[i] = 0
			}
		}
	}

	return &indexedPage{
		typ:         typ,
		values:      values[:size],
		columnIndex: ^columnIndex,
	}
}

func (page *indexedPage) Type() Type { return indexedPageType{page.typ} }

func (page *indexedPage) Column() int { return int(^page.columnIndex) }

func (page *indexedPage) Dictionary() Dictionary { return page.typ.dict }

func (page *indexedPage) NumRows() int64 { return int64(len(page.values)) }

func (page *indexedPage) NumValues() int64 { return int64(len(page.values)) }

func (page *indexedPage) NumNulls() int64 { return 0 }

func (page *indexedPage) Size() int64 { return 4 * int64(len(page.values)) }

func (page *indexedPage) RepetitionLevels() []byte { return nil }

func (page *indexedPage) DefinitionLevels() []byte { return nil }

func (page *indexedPage) Data() encoding.Values { return encoding.Int32Values(page.values) }

func (page *indexedPage) Values() ValueReader { return &indexedPageValues{page: page} }

func (page *indexedPage) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		min, max = page.typ.dict.Bounds(page.values)
		min.columnIndex = page.columnIndex
		max.columnIndex = page.columnIndex
	}
	return min, max, ok
}

func (page *indexedPage) Slice(i, j int64) Page {
	return &indexedPage{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

// indexedPageType is an adapter for the indexedType returned when accessing
// the type of an indexedPage value. It overrides the Encode/Decode methods to
// account for the fact that an indexed page is holding indexes of values into
// its dictionary instead of plain values.
type indexedPageType struct{ *indexedType }

func (t indexedPageType) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.Int32ValuesFromBytes(values)
}

func (t indexedPageType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeInt32(dst, src, enc)
}

func (t indexedPageType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeInt32(dst, src, enc)
}

func (t indexedPageType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return Int32Type.EstimateDecodeSize(numValues, src, enc)
}

type indexedPageValues struct {
	page   *indexedPage
	offset int
}

func (r *indexedPageValues) ReadValues(values []Value) (n int, err error) {
	if n = len(r.page.values) - r.offset; n == 0 {
		return 0, io.EOF
	}
	if n > len(values) {
		n = len(values)
	}
	r.page.typ.dict.Lookup(r.page.values[r.offset:r.offset+n], values[:n])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

// indexedColumnBuffer is an implementation of the ColumnBuffer interface which
// builds a page of indexes into a parent dictionary when values are written.
type indexedColumnBuffer struct{ indexedPage }

func newIndexedColumnBuffer(typ *indexedType, columnIndex int16, numValues int32) *indexedColumnBuffer {
	return &indexedColumnBuffer{
		indexedPage: indexedPage{
			typ:         typ,
			values:      make([]int32, 0, numValues),
			columnIndex: ^columnIndex,
		},
	}
}

func (col *indexedColumnBuffer) Clone() ColumnBuffer {
	return &indexedColumnBuffer{
		indexedPage: indexedPage{
			typ:         col.typ,
			values:      append([]int32{}, col.values...),
			columnIndex: col.columnIndex,
		},
	}
}

func (col *indexedColumnBuffer) Type() Type { return col.typ.Type }

func (col *indexedColumnBuffer) ColumnIndex() ColumnIndex { return indexedColumnIndex{col} }

func (col *indexedColumnBuffer) OffsetIndex() OffsetIndex { return indexedOffsetIndex{col} }

func (col *indexedColumnBuffer) BloomFilter() BloomFilter { return nil }

func (col *indexedColumnBuffer) Dictionary() Dictionary { return col.typ.dict }

func (col *indexedColumnBuffer) Pages() Pages { return onePage(col.Page()) }

func (col *indexedColumnBuffer) Page() Page { return &col.indexedPage }

func (col *indexedColumnBuffer) Reset() { col.values = col.values[:0] }

func (col *indexedColumnBuffer) Cap() int { return cap(col.values) }

func (col *indexedColumnBuffer) Len() int { return len(col.values) }

func (col *indexedColumnBuffer) Less(i, j int) bool {
	u := col.typ.dict.Index(col.values[i])
	v := col.typ.dict.Index(col.values[j])
	return col.typ.Compare(u, v) < 0
}

func (col *indexedColumnBuffer) Swap(i, j int) {
	col.values[i], col.values[j] = col.values[j], col.values[i]
}

func (col *indexedColumnBuffer) WriteValues(values []Value) (int, error) {
	i := len(col.values)
	j := len(col.values) + len(values)

	if j <= cap(col.values) {
		col.values = col.values[:j]
	} else {
		tmp := make([]int32, j, 2*j)
		copy(tmp, col.values)
		col.values = tmp
	}

	col.typ.dict.Insert(col.values[i:], values)
	return len(values), nil
}

func (col *indexedColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
	i := len(col.values)
	j := len(col.values) + rows.Len()

	if j <= cap(col.values) {
		col.values = col.values[:j]
	} else {
		tmp := make([]int32, j, 2*j)
		copy(tmp, col.values)
		col.values = tmp
	}

	col.typ.dict.insert(col.values[i:], rows)
}

func (col *indexedColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
	i := int(offset)
	switch {
	case i < 0:
		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
	case i >= len(col.values):
		return 0, io.EOF
	default:
		for n < len(values) && i < len(col.values) {
			values[n] = col.typ.dict.Index(col.values[i])
			values[n].columnIndex = col.columnIndex
			n++
			i++
		}
		if n < len(values) {
			err = io.EOF
		}
		return n, err
	}
}

func (col *indexedColumnBuffer) ReadRowAt(row Row, index int64) (Row, error) {
	switch {
	case index < 0:
		return row, errRowIndexOutOfBounds(index, int64(len(col.values)))
	case index >= int64(len(col.values)):
		return row, io.EOF
	default:
		v := col.typ.dict.Index(col.values[index])
		v.columnIndex = col.columnIndex
		return append(row, v), nil
	}
}

type indexedColumnIndex struct{ col *indexedColumnBuffer }

func (index indexedColumnIndex) NumPages() int       { return 1 }
func (index indexedColumnIndex) NullCount(int) int64 { return 0 }
func (index indexedColumnIndex) NullPage(int) bool   { return false }
func (index indexedColumnIndex) MinValue(int) Value {
	min, _, _ := index.col.Bounds()
	return min
}
func (index indexedColumnIndex) MaxValue(int) Value {
	_, max, _ := index.col.Bounds()
	return max
}
func (index indexedColumnIndex) IsAscending() bool {
	min, max, _ := index.col.Bounds()
	return index.col.typ.Compare(min, max) <= 0
}
func (index indexedColumnIndex) IsDescending() bool {
	min, max, _ := index.col.Bounds()
	return index.col.typ.Compare(min, max) > 0
}

type indexedOffsetIndex struct{ col *indexedColumnBuffer }

func (index indexedOffsetIndex) NumPages() int                { return 1 }
func (index indexedOffsetIndex) Offset(int) int64             { return 0 }
func (index indexedOffsetIndex) CompressedPageSize(int) int64 { return index.col.Size() }
func (index indexedOffsetIndex) FirstRowIndex(int) int64      { return 0 }


================================================
FILE: dictionary_amd64.go
================================================
//go:build !purego

package parquet

import (
	"unsafe"

	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
)

//go:noescape
func dictionaryBoundsInt32(dict []int32, indexes []int32) (min, max int32, err errno)

//go:noescape
func dictionaryBoundsInt64(dict []int64, indexes []int32) (min, max int64, err errno)

//go:noescape
func dictionaryBoundsFloat32(dict []float32, indexes []int32) (min, max float32, err errno)

//go:noescape
func dictionaryBoundsFloat64(dict []float64, indexes []int32) (min, max float64, err errno)

//go:noescape
func dictionaryBoundsUint32(dict []uint32, indexes []int32) (min, max uint32, err errno)

//go:noescape
func dictionaryBoundsUint64(dict []uint64, indexes []int32) (min, max uint64, err errno)

//go:noescape
func dictionaryBoundsBE128(dict [][16]byte, indexes []int32) (min, max *[16]byte, err errno)

//go:noescape
func dictionaryLookup32(dict []uint32, indexes []int32, rows sparse.Array) errno

//go:noescape
func dictionaryLookup64(dict []uint64, indexes []int32, rows sparse.Array) errno

//go:noescape
func dictionaryLookupByteArrayString(dict []uint32, page []byte, indexes []int32, rows sparse.Array) errno

//go:noescape
func dictionaryLookupFixedLenByteArrayString(dict []byte, len int, indexes []int32, rows sparse.Array) errno

//go:noescape
func dictionaryLookupFixedLenByteArrayPointer(dict []byte, len int, indexes []int32, rows sparse.Array) errno

func (d *int32Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	dict := unsafecast.Int32ToUint32(d.values)
	dictionaryLookup32(dict, indexes, rows).check()
}

func (d *int64Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	dict := unsafecast.Int64ToUint64(d.values)
	dictionaryLookup64(dict, indexes, rows).check()
}

func (d *floatDictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	dict := unsafecast.Float32ToUint32(d.values)
	dictionaryLookup32(dict, indexes, rows).check()
}

func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	dict := unsafecast.Float64ToUint64(d.values)
	dictionaryLookup64(dict, indexes, rows).check()
}

func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	// TODO: this optimization is disabled for now because it appears to race
	// with the garbage collector and result in writing pointers to free objects
	// to the output.
	//
	// This command was used to trigger the problem:
	//
	//	GOMAXPROCS=8 go test -run TestIssue368 -count 10
	//
	// https://github.com/segmentio/parquet-go/issues/368
	//
	//dictionaryLookupByteArrayString(d.offsets, d.values, indexes, rows).check()
	for i, j := range indexes {
		v := d.index(int(j))
		*(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
	}
}

func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	//dictionaryLookupFixedLenByteArrayString(d.data, d.size, indexes, rows).check()
	for i, j := range indexes {
		v := d.index(j)
		*(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
	}
}

func (d *uint32Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	dictionaryLookup32(d.values, indexes, rows).check()
}

func (d *uint64Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	dictionaryLookup64(d.values, indexes, rows).check()
}

func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	//dict := unsafecast.Uint128ToBytes(d.values)
	//dictionaryLookupFixedLenByteArrayString(dict, 16, indexes, rows).check()
	s := "0123456789ABCDEF"
	for i, j := range indexes {
		*(**[16]byte)(unsafe.Pointer(&s)) = d.index(j)
		*(*string)(rows.Index(i)) = s
	}
}

func (d *be128Dictionary) lookupPointer(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	//dict := unsafecast.Uint128ToBytes(d.values)
	//dictionaryLookupFixedLenByteArrayPointer(dict, 16, indexes, rows).check()
	for i, j := range indexes {
		*(**[16]byte)(rows.Index(i)) = d.index(j)
	}
}

func (d *int32Dictionary) bounds(indexes []int32) (min, max int32) {
	min, max, err := dictionaryBoundsInt32(d.values, indexes)
	err.check()
	return min, max
}

func (d *int64Dictionary) bounds(indexes []int32) (min, max int64) {
	min, max, err := dictionaryBoundsInt64(d.values, indexes)
	err.check()
	return min, max
}

func (d *floatDictionary) bounds(indexes []int32) (min, max float32) {
	min, max, err := dictionaryBoundsFloat32(d.values, indexes)
	err.check()
	return min, max
}

func (d *doubleDictionary) bounds(indexes []int32) (min, max float64) {
	min, max, err := dictionaryBoundsFloat64(d.values, indexes)
	err.check()
	return min, max
}

func (d *uint32Dictionary) bounds(indexes []int32) (min, max uint32) {
	min, max, err := dictionaryBoundsUint32(d.values, indexes)
	err.check()
	return min, max
}

func (d *uint64Dictionary) bounds(indexes []int32) (min, max uint64) {
	min, max, err := dictionaryBoundsUint64(d.values, indexes)
	err.check()
	return min, max
}

func (d *be128Dictionary) bounds(indexes []int32) (min, max *[16]byte) {
	min, max, err := dictionaryBoundsBE128(d.values, indexes)
	err.check()
	return min, max
}


================================================
FILE: dictionary_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define errnoIndexOutOfBounds 1

// func dictionaryBoundsInt32(dict []int32, indexes []int32) (min, max int32, err errno)
TEXT ·dictionaryBoundsInt32(SB), NOSPLIT, $0-64
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    XORQ R10, R10 // min
    XORQ R11, R11 // max
    XORQ R12, R12 // err
    XORQ SI, SI

    CMPQ DX, $0
    JE return

    MOVL (CX), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVL (AX)(DI*4), R10
    MOVL R10, R11

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ $0xFFFF, R8
    KMOVW R8, K1

    VPBROADCASTD BX, Y2  // [len(dict)...]
    VPBROADCASTD R10, Y3 // [min...]
    VMOVDQU32 Y3, Y4     // [max...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y2, Y0, K2
    KMOVW K2, R9
    CMPB R9, $0xFF
    JNE indexOutOfBounds
    VPGATHERDD (AX)(Y0*4), K1, Y1
    VPMINSD Y1, Y3, Y3
    VPMAXSD Y1, Y4, Y4
    KMOVW R8, K1
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512

    VPERM2I128 $1, Y3, Y3, Y0
    VPERM2I128 $1, Y4, Y4, Y1
    VPMINSD Y0, Y3, Y3
    VPMAXSD Y1, Y4, Y4

    VPSHUFD $0b1110, Y3, Y0
    VPSHUFD $0b1110, Y4, Y1
    VPMINSD Y0, Y3, Y3
    VPMAXSD Y1, Y4, Y4

    VPSHUFD $1, Y3, Y0
    VPSHUFD $1, Y4, Y1
    VPMINSD Y0, Y3, Y3
    VPMAXSD Y1, Y4, Y4

    MOVQ X3, R10
    MOVQ X4, R11
    ANDQ $0xFFFFFFFF, R10
    ANDQ $0xFFFFFFFF, R11

    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVL (AX)(DI*4), DI
    CMPL DI, R10
    CMOVLLT DI, R10
    CMPL DI, R11
    CMOVLGT DI, R11
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
return:
    MOVL R10, min+48(FP)
    MOVL R11, max+52(FP)
    MOVQ R12, err+56(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, R12
    JMP return

// func dictionaryBoundsInt64(dict []int64, indexes []int32) (min, max int64, err errno)
TEXT ·dictionaryBoundsInt64(SB), NOSPLIT, $0-72
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    XORQ R10, R10 // min
    XORQ R11, R11 // max
    XORQ R12, R12 // err
    XORQ SI, SI

    CMPQ DX, $0
    JE return

    MOVL (CX), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVQ (AX)(DI*8), R10
    MOVQ R10, R11

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ $0xFFFF, R8
    KMOVW R8, K1

    VPBROADCASTD BX, Y2  // [len(dict)...]
    VPBROADCASTQ R10, Z3 // [min...]
    VMOVDQU64 Z3, Z4     // [max...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y2, Y0, K2
    KMOVW K2, R9
    CMPB R9, $0xFF
    JNE indexOutOfBounds
    VPGATHERDQ (AX)(Y0*8), K1, Z1
    VPMINSQ Z1, Z3, Z3
    VPMAXSQ Z1, Z4, Z4
    KMOVW R8, K1
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512

    VPERMQ $0b1110, Z3, Z0
    VPERMQ $0b1110, Z4, Z1
    VPMINSQ Z0, Z3, Z3
    VPMAXSQ Z1, Z4, Z4

    VPERMQ $1, Z3, Z0
    VPERMQ $1, Z4, Z1
    VPMINSQ Z0, Z3, Z3
    VPMAXSQ Z1, Z4, Z4

    VSHUFF64X2 $2, Z3, Z3, Z0
    VSHUFF64X2 $2, Z4, Z4, Z1
    VPMINSQ Z0, Z3, Z3
    VPMAXSQ Z1, Z4, Z4

    MOVQ X3, R10
    MOVQ X4, R11

    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVQ (AX)(DI*8), DI
    CMPQ DI, R10
    CMOVQLT DI, R10
    CMPQ DI, R11
    CMOVQGT DI, R11
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
return:
    MOVQ R10, min+48(FP)
    MOVQ R11, max+56(FP)
    MOVQ R12, err+64(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, R12
    JMP return

// func dictionaryBoundsFloat32(dict []float32, indexes []int32) (min, max float32, err errno)
TEXT ·dictionaryBoundsFloat32(SB), NOSPLIT, $0-64
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    PXOR X3, X3   // min
    PXOR X4, X4   // max
    XORQ R12, R12 // err
    XORQ SI, SI

    CMPQ DX, $0
    JE return

    MOVL (CX), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVSS (AX)(DI*4), X3
    MOVAPS X3, X4

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ $0xFFFF, R8
    KMOVW R8, K1

    VPBROADCASTD BX, Y2 // [len(dict)...]
    VPBROADCASTD X3, Y3 // [min...]
    VMOVDQU32 Y3, Y4    // [max...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y2, Y0, K2
    KMOVW K2, R9
    CMPB R9, $0xFF
    JNE indexOutOfBounds
    VPGATHERDD (AX)(Y0*4), K1, Y1
    VMINPS Y1, Y3, Y3
    VMAXPS Y1, Y4, Y4
    KMOVW R8, K1
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512

    VPERM2I128 $1, Y3, Y3, Y0
    VPERM2I128 $1, Y4, Y4, Y1
    VMINPS Y0, Y3, Y3
    VMAXPS Y1, Y4, Y4

    VPSHUFD $0b1110, Y3, Y0
    VPSHUFD $0b1110, Y4, Y1
    VMINPS Y0, Y3, Y3
    VMAXPS Y1, Y4, Y4

    VPSHUFD $1, Y3, Y0
    VPSHUFD $1, Y4, Y1
    VMINPS Y0, Y3, Y3
    VMAXPS Y1, Y4, Y4

    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVSS (AX)(DI*4), X1
    UCOMISS X3, X1
    JAE skipAssignMin
    MOVAPS X1, X3
skipAssignMin:
    UCOMISS X4, X1
    JBE skipAssignMax
    MOVAPS X1, X4
skipAssignMax:
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
return:
    MOVSS X3, min+48(FP)
    MOVSS X4, max+52(FP)
    MOVQ R12, err+56(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, R12
    JMP return

// func dictionaryBoundsFloat64(dict []float64, indexes []int32) (min, max float64, err errno)
TEXT ·dictionaryBoundsFloat64(SB), NOSPLIT, $0-72
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    PXOR X3, X3   // min
    PXOR X4, X4   // max
    XORQ R12, R12 // err
    XORQ SI, SI

    CMPQ DX, $0
    JE return

    MOVL (CX), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVSD (AX)(DI*8), X3
    MOVAPS X3, X4

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ $0xFFFF, R8
    KMOVW R8, K1

    VPBROADCASTD BX, Y2 // [len(dict)...]
    VPBROADCASTQ X3, Z3 // [min...]
    VMOVDQU64 Z3, Z4    // [max...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y2, Y0, K2
    KMOVW K2, R9
    CMPB R9, $0xFF
    JNE indexOutOfBounds
    VPGATHERDQ (AX)(Y0*8), K1, Z1
    VMINPD Z1, Z3, Z3
    VMAXPD Z1, Z4, Z4
    KMOVW R8, K1
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512

    VPERMQ $0b1110, Z3, Z0
    VPERMQ $0b1110, Z4, Z1
    VMINPD Z0, Z3, Z3
    VMAXPD Z1, Z4, Z4

    VPERMQ $1, Z3, Z0
    VPERMQ $1, Z4, Z1
    VMINPD Z0, Z3, Z3
    VMAXPD Z1, Z4, Z4

    VSHUFF64X2 $2, Z3, Z3, Z0
    VSHUFF64X2 $2, Z4, Z4, Z1
    VMINPD Z0, Z3, Z3
    VMAXPD Z1, Z4, Z4

    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVSD (AX)(DI*8), X1
    UCOMISD X3, X1
    JAE skipAssignMin
    MOVAPD X1, X3
skipAssignMin:
    UCOMISD X4, X1
    JBE skipAssignMax
    MOVAPD X1, X4
skipAssignMax:
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
return:
    MOVSD X3, min+48(FP)
    MOVSD X4, max+56(FP)
    MOVQ R12, err+64(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, R12
    JMP return

// func dictionaryBoundsUint32(dict []uint32, indexes []int32) (min, max uint32, err errno)
TEXT ·dictionaryBoundsUint32(SB), NOSPLIT, $0-64
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    XORQ R10, R10 // min
    XORQ R11, R11 // max
    XORQ R12, R12 // err
    XORQ SI, SI

    CMPQ DX, $0
    JE return

    MOVL (CX), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVL (AX)(DI*4), R10
    MOVL R10, R11

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ $0xFFFF, R8
    KMOVW R8, K1

    VPBROADCASTD BX, Y2  // [len(dict)...]
    VPBROADCASTD R10, Y3 // [min...]
    VMOVDQU32 Y3, Y4     // [max...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y2, Y0, K2
    KMOVW K2, R9
    CMPB R9, $0xFF
    JNE indexOutOfBounds
    VPGATHERDD (AX)(Y0*4), K1, Y1
    VPMINUD Y1, Y3, Y3
    VPMAXUD Y1, Y4, Y4
    KMOVW R8, K1
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512

    VPERM2I128 $1, Y3, Y3, Y0
    VPERM2I128 $1, Y4, Y4, Y1
    VPMINUD Y0, Y3, Y3
    VPMAXUD Y1, Y4, Y4

    VPSHUFD $0b1110, Y3, Y0
    VPSHUFD $0b1110, Y4, Y1
    VPMINUD Y0, Y3, Y3
    VPMAXUD Y1, Y4, Y4

    VPSHUFD $1, Y3, Y0
    VPSHUFD $1, Y4, Y1
    VPMINUD Y0, Y3, Y3
    VPMAXUD Y1, Y4, Y4

    MOVQ X3, R10
    MOVQ X4, R11
    ANDQ $0xFFFFFFFF, R10
    ANDQ $0xFFFFFFFF, R11

    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVL (AX)(DI*4), DI
    CMPL DI, R10
    CMOVLCS DI, R10
    CMPL DI, R11
    CMOVLHI DI, R11
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
return:
    MOVL R10, min+48(FP)
    MOVL R11, max+52(FP)
    MOVQ R12, err+56(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, R12
    JMP return

// func dictionaryBoundsUint64(dict []uint64, indexes []int32) (min, max uint64, err errno)
TEXT ·dictionaryBoundsUint64(SB), NOSPLIT, $0-72
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    XORQ R10, R10 // min
    XORQ R11, R11 // max
    XORQ R12, R12 // err
    XORQ SI, SI

    CMPQ DX, $0
    JE return

    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVQ (AX)(DI*8), R10
    MOVQ R10, R11

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ $0xFFFF, R8
    KMOVW R8, K1

    VPBROADCASTD BX, Y2  // [len(dict)...]
    VPBROADCASTQ R10, Z3 // [min...]
    VMOVDQU64 Z3, Z4     // [max...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y2, Y0, K2
    KMOVW K2, R9
    CMPB R9, $0xFF
    JNE indexOutOfBounds
    VPGATHERDQ (AX)(Y0*8), K1, Z1
    VPMINUQ Z1, Z3, Z3
    VPMAXUQ Z1, Z4, Z4
    KMOVW R8, K1
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512

    VPERMQ $0b1110, Z3, Z0
    VPERMQ $0b1110, Z4, Z1
    VPMINUQ Z0, Z3, Z3
    VPMAXUQ Z1, Z4, Z4

    VPERMQ $1, Z3, Z0
    VPERMQ $1, Z4, Z1
    VPMINUQ Z0, Z3, Z3
    VPMAXUQ Z1, Z4, Z4

    VSHUFF64X2 $2, Z3, Z3, Z0
    VSHUFF64X2 $2, Z4, Z4, Z1
    VPMINUQ Z0, Z3, Z3
    VPMAXUQ Z1, Z4, Z4

    MOVQ X3, R10
    MOVQ X4, R11

    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVQ (AX)(DI*8), DI
    CMPQ DI, R10
    CMOVQCS DI, R10
    CMPQ DI, R11
    CMOVQHI DI, R11
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
return:
    MOVQ R10, min+48(FP)
    MOVQ R11, max+56(FP)
    MOVQ R12, err+64(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, R12
    JMP return

// func dictionaryBoundsBE128(dict [][16]byte, indexes []int32) (min, max *[16]byte, err errno)
TEXT ·dictionaryBoundsBE128(SB), NOSPLIT, $0-72
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX
    SHLQ $2, DX // x 4
    ADDQ CX, DX // end

    XORQ R8, R8 // min (pointer)
    XORQ R9, R9 // max (pointer)
    XORQ SI, SI // err
    XORQ DI, DI

    CMPQ DX, $0
    JE return

    MOVL (CX), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    SHLQ $4, DI // the dictionary contains 16 byte words
    LEAQ (AX)(DI*1), R8
    MOVQ R8, R9
    MOVQ 0(AX)(DI*1), R10 // min (high)
    MOVQ 8(AX)(DI*1), R11 // min (low)
    BSWAPQ R10
    BSWAPQ R11
    MOVQ R10, R12 // max (high)
    MOVQ R11, R13 // max (low)

    JMP next
loop:
    MOVL (CX), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    SHLQ $4, DI
    MOVQ 0(AX)(DI*1), R14
    MOVQ 8(AX)(DI*1), R15
    BSWAPQ R14
    BSWAPQ R15
testLessThan:
    CMPQ R14, R10
    JA testGreaterThan
    JB lessThan
    CMPQ R15, R11
    JAE testGreaterThan
lessThan:
    LEAQ (AX)(DI*1), R8
    MOVQ R14, R10
    MOVQ R15, R11
    JMP next
testGreaterThan:
    CMPQ R14, R12
    JB next
    JA greaterThan
    CMPQ R15, R13
    JBE next
greaterThan:
    LEAQ (AX)(DI*1), R9
    MOVQ R14, R12
    MOVQ R15, R13
next:
    ADDQ $4, CX
    CMPQ CX, DX
    JNE loop
return:
    MOVQ R8, min+48(FP)
    MOVQ R9, max+56(FP)
    MOVQ SI, err+64(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, SI
    JMP return

// The lookup functions provide optimized versions of the dictionary index
// lookup logic.
//
// When AVX512 is available, the AVX512 versions of the functions are used
// which use the VPGATHER* instructions to perform 8 parallel lookups of the
// values in the dictionary, then VPSCATTER* to do 8 parallel writes to the
// sparse output buffer.

// func dictionaryLookup32(dict []uint32, indexes []int32, rows sparse.Array) errno
TEXT ·dictionaryLookup32(SB), NOSPLIT, $0-80
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    MOVQ rows_array_ptr+48(FP), R8
    MOVQ rows_array_off+64(FP), R9

    XORQ SI, SI

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ R9, R10
    SHLQ $3, R10 // 8 * size

    MOVW $0xFFFF, R11
    KMOVW R11, K1
    KMOVW R11, K2

    VPBROADCASTD R9, Y2           // [size...]
    VPMULLD ·range0n8(SB), Y2, Y2 // [0*size,1*size,...]
    VPBROADCASTD BX, Y3           // [len(dict)...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y3, Y0, K3
    KMOVW K3, R11
    CMPB R11, $0xFF
    JNE indexOutOfBounds
    VPGATHERDD (AX)(Y0*4), K1, Y1
    VPSCATTERDD Y1, K2, (R8)(Y2*1)
    KMOVW R11, K1
    KMOVW R11, K2
    ADDQ R10, R8
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512
    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVL (AX)(DI*4), DI
    MOVL DI, (R8)
    ADDQ R9, R8
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
    XORQ AX, AX
return:
    MOVQ AX, ret+72(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, AX
    JMP return

// func dictionaryLookup64(dict []uint64, indexes []int32, rows sparse.Array) errno
TEXT ·dictionaryLookup64(SB), NOSPLIT, $0-80
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ indexes_base+24(FP), CX
    MOVQ indexes_len+32(FP), DX

    MOVQ rows_array_ptr+48(FP), R8
    MOVQ rows_array_off+64(FP), R9

    XORQ SI, SI

    CMPQ DX, $8
    JB test

    CMPB ·hasAVX512VL(SB), $0
    JE test

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    MOVQ R9, R10
    SHLQ $3, R10 // 8 * size

    MOVW $0xFFFF, R11
    KMOVW R11, K1
    KMOVW R11, K2

    VPBROADCASTD R9, Y2           // [size...]
    VPMULLD ·range0n8(SB), Y2, Y2 // [0*size,1*size,...]
    VPBROADCASTD BX, Y3           // [len(dict)...]
loopAVX512:
    VMOVDQU32 (CX)(SI*4), Y0
    VPCMPUD $1, Y3, Y0, K3
    KMOVW K3, R11
    CMPB R11, $0xFF
    JNE indexOutOfBounds
    VPGATHERDQ (AX)(Y0*8), K1, Z1
    VPSCATTERDQ Z1, K2, (R8)(Y2*1)
    KMOVW R11, K1
    KMOVW R11, K2
    ADDQ R10, R8
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX512
    VZEROUPPER
    JMP test
loop:
    MOVL (CX)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds
    MOVQ (AX)(DI*8), DI
    MOVQ DI, (R8)
    ADDQ R9, R8
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
    XORQ AX, AX
return:
    MOVQ AX, ret+72(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, AX
    JMP return

// func dictionaryLookupByteArrayString(dict []uint32, page []byte, indexes []int32, rows sparse.Array) errno
TEXT ·dictionaryLookupByteArrayString(SB), NOSPLIT, $0-104
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX
    DECQ BX // the offsets have the total length as last element

    MOVQ page_base+24(FP), CX

    MOVQ indexes_base+48(FP), R8
    MOVQ indexes_len+56(FP), R9

    MOVQ rows_array_ptr+72(FP), R10
    MOVQ rows_array_off+88(FP), R11

    XORQ DI, DI
    XORQ SI, SI
loop:
    // Load the index that we want to read the value from. This may come from
    // user input so we must validate that the indexes are within the bounds of
    // the dictionary.
    MOVL (R8)(SI*4), DI
    CMPL DI, BX
    JAE indexOutOfBounds

    // Load the offsets within the dictionary page where the value is stored.
    // We trust the offsets to be correct since they are generated internally by
    // the dictionary code, there is no need to check that they are within the
    // bounds of the dictionary page.
    MOVL 0(AX)(DI*4), DX
    MOVL 4(AX)(DI*4), DI

    // Compute the length of the value (the difference between two consecutive
    // offsets), and the pointer to the first byte of the string value.
    SUBL DX, DI
    LEAQ (CX)(DX*1), DX

    // Store the length and pointer to the value into the output location.
    // The memory layout is expected to hold a pointer and length, which are
    // both 64 bits words. This is the layout used by parquet.Value and the Go
    // string value type.
    MOVQ DX, (R10)
    MOVQ DI, 8(R10)

    ADDQ R11, R10
    INCQ SI
test:
    CMPQ SI, R9
    JNE loop
    XORQ AX, AX
return:
    MOVQ AX, ret+96(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, AX
    JMP return

// func dictionaryLookupFixedLenByteArrayString(dict []byte, len int, indexes []int32, rows sparse.Array) errno
TEXT ·dictionaryLookupFixedLenByteArrayString(SB), NOSPLIT, $0-88
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ len+24(FP), CX

    MOVQ indexes_base+32(FP), DX
    MOVQ indexes_len+40(FP), R8

    MOVQ rows_array_ptr+56(FP), R9
    MOVQ rows_array_off+72(FP), R10

    XORQ DI, DI
    XORQ SI, SI
loop:
    MOVL (DX)(SI*4), DI
    IMULQ CX, DI
    CMPL DI, BX
    JAE indexOutOfBounds

    ADDQ AX, DI
    MOVQ DI, (R9)
    MOVQ CX, 8(R9)

    ADDQ R10, R9
    INCQ SI
test:
    CMPQ SI, R8
    JNE loop
    XORQ AX, AX
return:
    MOVQ AX, ret+80(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, AX
    JMP return

// This is the same algorithm as dictionaryLookupFixedLenByteArrayString but we
// only store the pointer to the location holding the value instead of storing
// the pair of pointer and length. Since the length is fixed for this dictionary
// type, the application can assume it at the call site.
//
// func dictionaryLookupFixedLenByteArrayPointer(dict []byte, len int, indexes []int32, rows sparse.Array) errno
TEXT ·dictionaryLookupFixedLenByteArrayPointer(SB), NOSPLIT, $0-88
    MOVQ dict_base+0(FP), AX
    MOVQ dict_len+8(FP), BX

    MOVQ len+24(FP), CX

    MOVQ indexes_base+32(FP), DX
    MOVQ indexes_len+40(FP), R8

    MOVQ rows_array_ptr+56(FP), R9
    MOVQ rows_array_off+72(FP), R10

    XORQ DI, DI
    XORQ SI, SI
loop:
    MOVL (DX)(SI*4), DI
    IMULQ CX, DI
    CMPL DI, BX
    JAE indexOutOfBounds

    ADDQ AX, DI
    MOVQ DI, (R9)

    ADDQ R10, R9
    INCQ SI
test:
    CMPQ SI, R8
    JNE loop
    XORQ AX, AX
return:
    MOVQ AX, ret+80(FP)
    RET
indexOutOfBounds:
    MOVQ $errnoIndexOutOfBounds, AX
    JMP return

GLOBL ·range0n8(SB), RODATA|NOPTR, $40
DATA ·range0n8+0(SB)/4, $0
DATA ·range0n8+4(SB)/4, $1
DATA ·range0n8+8(SB)/4, $2
DATA ·range0n8+12(SB)/4, $3
DATA ·range0n8+16(SB)/4, $4
DATA ·range0n8+20(SB)/4, $5
DATA ·range0n8+24(SB)/4, $6
DATA ·range0n8+28(SB)/4, $7
DATA ·range0n8+32(SB)/4, $8


================================================
FILE: dictionary_purego.go
================================================
//go:build purego || !amd64

package parquet

import (
	"unsafe"

	"github.com/segmentio/parquet-go/sparse"
)

func (d *int32Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(*int32)(rows.Index(i)) = d.index(j)
	}
}

func (d *int64Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(*int64)(rows.Index(i)) = d.index(j)
	}
}

func (d *floatDictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(*float32)(rows.Index(i)) = d.index(j)
	}
}

func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(*float64)(rows.Index(i)) = d.index(j)
	}
}

func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		v := d.index(int(j))
		*(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
	}
}

func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		v := d.index(j)
		*(*string)(rows.Index(i)) = *(*string)(unsafe.Pointer(&v))
	}
}

func (d *uint32Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(*uint32)(rows.Index(i)) = d.index(j)
	}
}

func (d *uint64Dictionary) lookup(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(*uint64)(rows.Index(i)) = d.index(j)
	}
}

func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	s := "0123456789ABCDEF"
	for i, j := range indexes {
		*(**[16]byte)(unsafe.Pointer(&s)) = d.index(j)
		*(*string)(rows.Index(i)) = s
	}
}

func (d *be128Dictionary) lookupPointer(indexes []int32, rows sparse.Array) {
	checkLookupIndexBounds(indexes, rows)
	for i, j := range indexes {
		*(**[16]byte)(rows.Index(i)) = d.index(j)
	}
}

func (d *int32Dictionary) bounds(indexes []int32) (min, max int32) {
	min = d.index(indexes[0])
	max = min

	for _, i := range indexes[1:] {
		value := d.index(i)
		if value < min {
			min = value
		}
		if value > max {
			max = value
		}
	}

	return min, max
}

func (d *int64Dictionary) bounds(indexes []int32) (min, max int64) {
	min = d.index(indexes[0])
	max = min

	for _, i := range indexes[1:] {
		value := d.index(i)
		if value < min {
			min = value
		}
		if value > max {
			max = value
		}
	}

	return min, max
}

func (d *floatDictionary) bounds(indexes []int32) (min, max float32) {
	min = d.index(indexes[0])
	max = min

	for _, i := range indexes[1:] {
		value := d.index(i)
		if value < min {
			min = value
		}
		if value > max {
			max = value
		}
	}

	return min, max
}

func (d *doubleDictionary) bounds(indexes []int32) (min, max float64) {
	min = d.index(indexes[0])
	max = min

	for _, i := range indexes[1:] {
		value := d.index(i)
		if value < min {
			min = value
		}
		if value > max {
			max = value
		}
	}

	return min, max
}

func (d *uint32Dictionary) bounds(indexes []int32) (min, max uint32) {
	min = d.index(indexes[0])
	max = min

	for _, i := range indexes[1:] {
		value := d.index(i)
		if value < min {
			min = value
		}
		if value > max {
			max = value
		}
	}

	return min, max
}

func (d *uint64Dictionary) bounds(indexes []int32) (min, max uint64) {
	min = d.index(indexes[0])
	max = min

	for _, i := range indexes[1:] {
		value := d.index(i)
		if value < min {
			min = value
		}
		if value > max {
			max = value
		}
	}

	return min, max
}

func (d *be128Dictionary) bounds(indexes []int32) (min, max *[16]byte) {
	values := [64]*[16]byte{}
	min = d.index(indexes[0])
	max = min

	for i := 1; i < len(indexes); i += len(values) {
		n := len(indexes) - i
		if n > len(values) {
			n = len(values)
		}
		j := i + n
		d.lookupPointer(indexes[i:j:j], makeArrayBE128(values[:n:n]))

		for _, value := range values[:n:n] {
			switch {
			case lessBE128(value, min):
				min = value
			case lessBE128(max, value):
				max = value
			}
		}
	}

	return min, max
}


================================================
FILE: dictionary_test.go
================================================
package parquet_test

import (
	"bytes"
	"fmt"
	"math/rand"
	"testing"
	"time"

	"github.com/segmentio/parquet-go"
)

var dictionaryTypes = [...]parquet.Type{
	parquet.BooleanType,
	parquet.Int32Type,
	parquet.Int64Type,
	parquet.Int96Type,
	parquet.FloatType,
	parquet.DoubleType,
	parquet.ByteArrayType,
	parquet.FixedLenByteArrayType(10),
	parquet.FixedLenByteArrayType(16),
	parquet.Uint(32).Type(),
	parquet.Uint(64).Type(),
}

func TestDictionary(t *testing.T) {
	for _, typ := range dictionaryTypes {
		for _, numValues := range []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 1e2, 1e3, 1e4} {
			t.Run(fmt.Sprintf("%s/N=%d", typ, numValues), func(t *testing.T) {
				testDictionary(t, typ, numValues)
			})
		}
	}
}

func testDictionary(t *testing.T, typ parquet.Type, numValues int) {
	const columnIndex = 1

	dict := typ.NewDictionary(columnIndex, 0, typ.NewValues(nil, nil))
	values := make([]parquet.Value, numValues)
	indexes := make([]int32, numValues)
	lookups := make([]parquet.Value, numValues)

	f := randValueFuncOf(typ)
	r := rand.New(rand.NewSource(int64(numValues)))

	for i := range values {
		values[i] = f(r)
		values[i] = values[i].Level(0, 0, columnIndex)
	}

	mapping := make(map[int32]parquet.Value, numValues)

	for i := 0; i < numValues; {
		j := i + ((numValues-i)/2 + 1)
		if j > numValues {
			j = numValues
		}

		dict.Insert(indexes[i:j], values[i:j])

		for k, v := range values[i:j] {
			mapping[indexes[i+k]] = v
		}

		for _, index := range indexes[i:j] {
			if index < 0 || index >= int32(dict.Len()) {
				t.Fatalf("index out of bounds: %d", index)
			}
		}

		// second insert is a no-op since all the values are already in the dictionary
		lastDictLen := dict.Len()
		dict.Insert(indexes[i:j], values[i:j])

		if dict.Len() != lastDictLen {
			for k, index := range indexes[i:j] {
				if index >= int32(len(mapping)) {
					t.Log(values[i+k])
				}
			}

			t.Fatalf("%d values were inserted on the second pass", dict.Len()-len(mapping))
		}

		r.Shuffle(j-i, func(a, b int) {
			indexes[a+i], indexes[b+i] = indexes[b+i], indexes[a+i]
		})

		dict.Lookup(indexes[i:j], lookups[i:j])

		for lookupIndex, valueIndex := range indexes[i:j] {
			want := mapping[valueIndex]
			got := lookups[lookupIndex+i]

			if !parquet.DeepEqual(want, got) {
				t.Fatalf("wrong value looked up at index %d: want=%#v got=%#v", valueIndex, want, got)
			}
		}

		minValue := values[i]
		maxValue := values[i]

		for _, value := range values[i+1 : j] {
			switch {
			case typ.Compare(value, minValue) < 0:
				minValue = value
			case typ.Compare(value, maxValue) > 0:
				maxValue = value
			}
		}

		lowerBound, upperBound := dict.Bounds(indexes[i:j])
		if !parquet.DeepEqual(lowerBound, minValue) {
			t.Errorf("wrong lower bound between indexes %d and %d: want=%#v got=%#v", i, j, minValue, lowerBound)
		}
		if !parquet.DeepEqual(upperBound, maxValue) {
			t.Errorf("wrong upper bound between indexes %d and %d: want=%#v got=%#v", i, j, maxValue, upperBound)
		}

		i = j
	}

	for i := range lookups {
		lookups[i] = parquet.Value{}
	}

	dict.Lookup(indexes, lookups)

	for lookupIndex, valueIndex := range indexes {
		want := mapping[valueIndex]
		got := lookups[lookupIndex]

		if !parquet.Equal(want, got) {
			t.Fatalf("wrong value looked up at index %d: want=%+v got=%+v", valueIndex, want, got)
		}
	}
}

func BenchmarkDictionary(b *testing.B) {
	tests := []struct {
		scenario string
		init     func(parquet.Dictionary, []int32, []parquet.Value)
		test     func(parquet.Dictionary, []int32, []parquet.Value)
	}{
		{
			scenario: "Bounds",
			init:     parquet.Dictionary.Insert,
			test: func(dict parquet.Dictionary, indexes []int32, _ []parquet.Value) {
				dict.Bounds(indexes)
			},
		},

		{
			scenario: "Insert",
			test:     parquet.Dictionary.Insert,
		},

		{
			scenario: "Lookup",
			init:     parquet.Dictionary.Insert,
			test:     parquet.Dictionary.Lookup,
		},
	}

	for i, test := range tests {
		b.Run(test.scenario, func(b *testing.B) {
			for j, typ := range dictionaryTypes {
				for _, numValues := range []int{1e2, 1e3, 1e4, 1e5, 1e6} {
					buf := typ.NewValues(make([]byte, 0, 4*numValues), nil)
					dict := typ.NewDictionary(0, 0, buf)
					values := make([]parquet.Value, numValues)

					f := randValueFuncOf(typ)
					r := rand.New(rand.NewSource(int64(i * j * numValues)))

					for i := range values {
						values[i] = f(r)
					}

					indexes := make([]int32, len(values))
					if test.init != nil {
						test.init(dict, indexes, values)
					}

					b.Run(fmt.Sprintf("%s/N=%d", typ, numValues), func(b *testing.B) {
						start := time.Now()

						for i := 0; i < b.N; i++ {
							test.test(dict, indexes, values)
						}

						seconds := time.Since(start).Seconds()
						b.ReportMetric(float64(numValues*b.N)/seconds, "value/s")
					})
				}
			}
		})
	}
}

func TestIssue312(t *testing.T) {
	node := parquet.String()
	node = parquet.Encoded(node, &parquet.RLEDictionary)
	g := parquet.Group{}
	g["mystring"] = node
	schema := parquet.NewSchema("test", g)

	rows := []parquet.Row{[]parquet.Value{parquet.ValueOf("hello").Level(0, 0, 0)}}

	var storage bytes.Buffer

	tests := []struct {
		name        string
		getRowGroup func(t *testing.T) parquet.RowGroup
	}{
		{
			name: "Writer",
			getRowGroup: func(t *testing.T) parquet.RowGroup {
				t.Helper()

				w := parquet.NewWriter(&storage, schema)
				_, err := w.WriteRows(rows)
				if err != nil {
					t.Fatal(err)
				}
				if err := w.Close(); err != nil {
					t.Fatal(err)
				}

				r := bytes.NewReader(storage.Bytes())
				f, err := parquet.OpenFile(r, int64(storage.Len()))
				if err != nil {
					t.Fatal(err)
				}
				return f.RowGroups()[0]
			},
		},
		{
			name: "Buffer",
			getRowGroup: func(t *testing.T) parquet.RowGroup {
				t.Helper()

				b := parquet.NewBuffer(schema)
				_, err := b.WriteRows(rows)
				if err != nil {
					t.Fatal(err)
				}
				return b
			},
		},
	}

	for _, testCase := range tests {
		t.Run(testCase.name, func(t *testing.T) {
			rowGroup := testCase.getRowGroup(t)

			chunk := rowGroup.ColumnChunks()[0]
			idx := chunk.ColumnIndex()
			val := idx.MinValue(0)
			columnType := chunk.Type()
			values := columnType.NewValues(val.Bytes(), []uint32{0, uint32(len(val.Bytes()))})

			// This test ensures that the dictionary type created by column
			// chunks of parquet readers and buffers are the same. We want the
			// column chunk type to be the actual value type, even when the
			// schema uses a dictionary encoding.
			//
			// https://github.com/segmentio/parquet-go/issues/312
			_ = columnType.NewDictionary(0, 1, values)
		})
	}
}


================================================
FILE: encoding/bitpacked/bitpacked.go
================================================
package bitpacked

import (
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
)

type Encoding struct {
	encoding.NotSupported
	BitWidth int
}

func (e *Encoding) String() string {
	return "BIT_PACKED"
}

func (e *Encoding) Encoding() format.Encoding {
	return format.BitPacked
}

func (e *Encoding) EncodeLevels(dst []byte, src []uint8) ([]byte, error) {
	dst, err := encodeLevels(dst[:0], src, uint(e.BitWidth))
	return dst, e.wrap(err)
}

func (e *Encoding) DecodeLevels(dst []uint8, src []byte) ([]uint8, error) {
	dst, err := decodeLevels(dst[:0], src, uint(e.BitWidth))
	return dst, e.wrap(err)
}

func (e *Encoding) wrap(err error) error {
	if err != nil {
		err = encoding.Error(e, err)
	}
	return err
}

func encodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) {
	if bitWidth == 0 || len(src) == 0 {
		return append(dst[:0], 0), nil
	}

	n := ((int(bitWidth) * len(src)) + 7) / 8
	c := n + 1

	if cap(dst) < c {
		dst = make([]byte, c, 2*c)
	} else {
		dst = dst[:c]
		for i := range dst {
			dst[i] = 0
		}
	}

	bitMask := byte(1<<bitWidth) - 1
	bitShift := 8 - bitWidth
	bitOffset := uint(0)

	for _, value := range src {
		v := bitFlip(value) >> bitShift
		i := bitOffset / 8
		j := bitOffset % 8
		dst[i+0] |= (v & bitMask) << j
		dst[i+1] |= (v >> (8 - j))
		bitOffset += bitWidth
	}

	return dst[:n], nil
}

func decodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) {
	if bitWidth == 0 || len(src) == 0 {
		return append(dst[:0], 0), nil
	}

	numBits := 8 * uint(len(src))
	numValues := int(numBits / bitWidth)
	if (numBits % bitWidth) != 0 {
		numValues++
	}

	if cap(dst) < numValues {
		dst = make([]byte, numValues, 2*numValues)
	} else {
		dst = dst[:numValues]
		for i := range dst {
			dst[i] = 0
		}
	}

	bitMask := byte(1<<bitWidth) - 1
	bitShift := 8 - bitWidth
	bitOffset := uint(0)

	for k := range dst {
		i := bitOffset / 8
		j := bitOffset % 8
		v := (src[i+0] >> j)
		if int(i+1) < len(src) {
			v |= (src[i+1] << (8 - j))
		}
		v &= bitMask
		dst[k] = bitFlip(v) >> bitShift
		bitOffset += bitWidth
	}

	return dst, nil
}

func bitFlip(b byte) byte {
	return (((b >> 0) & 1) << 7) |
		(((b >> 1) & 1) << 6) |
		(((b >> 2) & 1) << 5) |
		(((b >> 3) & 1) << 4) |
		(((b >> 4) & 1) << 3) |
		(((b >> 5) & 1) << 2) |
		(((b >> 6) & 1) << 1) |
		(((b >> 7) & 1) << 0)
}


================================================
FILE: encoding/bitpacked/bitpacked_test.go
================================================
//go:build go1.18
// +build go1.18

package bitpacked_test

import (
	"testing"

	"github.com/segmentio/parquet-go/encoding/fuzz"
	"github.com/segmentio/parquet-go/encoding/rle"
)

func FuzzEncodeLevels(f *testing.F) {
	fuzz.EncodeLevels(f, &rle.Encoding{BitWidth: 8})
}


================================================
FILE: encoding/bytestreamsplit/bytestreamsplit.go
================================================
package bytestreamsplit

import (
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

// This encoder implements a version of the Byte Stream Split encoding as described
// in https://github.com/apache/parquet-format/blob/master/Encodings.md#byte-stream-split-byte_stream_split--9
type Encoding struct {
	encoding.NotSupported
}

func (e *Encoding) String() string {
	return "BYTE_STREAM_SPLIT"
}

func (e *Encoding) Encoding() format.Encoding {
	return format.ByteStreamSplit
}

func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
	dst = resize(dst, 4*len(src))
	encodeFloat(dst, unsafecast.Float32ToBytes(src))
	return dst, nil
}

func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
	dst = resize(dst, 8*len(src))
	encodeDouble(dst, unsafecast.Float64ToBytes(src))
	return dst, nil
}

func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) {
	if (len(src) % 4) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src))
	}
	buf := resize(unsafecast.Float32ToBytes(dst), len(src))
	decodeFloat(buf, src)
	return unsafecast.BytesToFloat32(buf), nil
}

func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) {
	if (len(src) % 8) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src))
	}
	buf := resize(unsafecast.Float64ToBytes(dst), len(src))
	decodeDouble(buf, src)
	return unsafecast.BytesToFloat64(buf), nil
}

func resize(buf []byte, size int) []byte {
	if cap(buf) < size {
		buf = make([]byte, size, 2*size)
	} else {
		buf = buf[:size]
	}
	return buf
}


================================================
FILE: encoding/bytestreamsplit/bytestreamsplit_amd64.go
================================================
//go:build !purego

package bytestreamsplit

import (
	"golang.org/x/sys/cpu"
)

var encodeFloatHasAVX512 = cpu.X86.HasAVX512 &&
	cpu.X86.HasAVX512F &&
	cpu.X86.HasAVX512VL

var encodeDoubleHasAVX512 = cpu.X86.HasAVX512 &&
	cpu.X86.HasAVX512F &&
	cpu.X86.HasAVX512VL &&
	cpu.X86.HasAVX512VBMI // VPERMB

var decodeFloatHasAVX2 = cpu.X86.HasAVX2

var decodeDoubleHasAVX512 = cpu.X86.HasAVX512 &&
	cpu.X86.HasAVX512F &&
	cpu.X86.HasAVX512VL &&
	cpu.X86.HasAVX512VBMI // VPERMB

//go:noescape
func encodeFloat(dst, src []byte)

//go:noescape
func encodeDouble(dst, src []byte)

//go:noescape
func decodeFloat(dst, src []byte)

//go:noescape
func decodeDouble(dst, src []byte)


================================================
FILE: encoding/bytestreamsplit/bytestreamsplit_amd64.s
================================================
 //go:build !purego

#include "textflag.h"

// This file contains optimizations of the BYTE_STREAM_SPLIT encoding using AVX2
// and AVX512 (when available).
//
// The AVX2/512 instruction set comes with instructions to load memory from, or
// store memory at sparse locations called VPGATHER and VPSCATTER. VPGATHER was
// available in the AVX2 instruction set, VPSCATTER was introduced in AVX512
// (when the AVX512_VBMI extension is supported). Gathering bytes are sparse
// memory locations is useful during the decoding process since we are
// recomposing 32 or 64 bit floating point values from 4 or 8 bytes dispatched
// in the input byte array.
//
// To either deconstruct or reconstruct floating point values, we need to
// reorder the bytes of each value. If we have 4 32 bit floats, we can permute
// their bytes so that the first one contains all the first bytes, the second
// contains all the second bytes, etc... The VPSHUFB instruction is used to
// perform the byte permutation, or the VPERMB instruction for 64 bit floats.
//
// We use different instructions because the VPSHUFB instruction works on two
// lanes of 16 bytes when used on YMM registers. 4 32 bit floats take 16 bytes,
// so a a YMM register can hold two lanes of 4 32 bit floats and the VPSHUFB
// can permute the two sets of values in a single invocation. For 64 bit floats
// we need to permute 8 values, which take 64 bytes and therefore need to be
// held in a ZMM register and apply permutations across the entire register,
// which is only possible using VPERMB.
//
// Technically we could use ZMM registers when working on 32 bit floats to work
// on 16 values per iteration. However, measurements indicated that the latency
// of VPGATHERDD/VPSCATTERDD on ZMM registers did not provide any improvements
// to the throughput of the algorithms, but working on more values increased the
// code complexity. Using YMM registers offered the best balance between
// performance and maintainability.
//
// At a high level the vectorized algorithms are the following:
//
// encoding
// --------
//   * Load a vector of data from the input buffer
//   * Permute bytes, grouping bytes by index
//   * Scatter bytes of the register to the output buffer
//
// decoding
// --------
//   * Gather sparse bytes from the input buffer
//   * Permute bytes, reconstructing the original values
//   * Store the vector in the output buffer
//
// When AVX instructions are not available, the functions fallback to scalar
// implementations of the algorithms. These yield much lower throughput, but
// performed 20-30% better than the code generated by the Go compiler.

// func encodeFloat(dst, src []byte)
TEXT ·encodeFloat(SB), NOSPLIT, $0-48
    MOVQ src_base+24(FP), AX
    MOVQ src_len+32(FP), BX
    MOVQ dst_base+0(FP), DX

    MOVQ AX, CX
    ADDQ BX, CX // end
    SHRQ $2, BX // len

    CMPQ BX, $0
    JE done

    CMPB ·encodeFloatHasAVX512(SB), $0
    JE loop1x4

    CMPQ BX, $8
    JB loop1x4

    MOVQ CX, DI
    SUBQ AX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    ADDQ AX, DI

    VMOVDQU32 shuffle8x4<>(SB), Y0
    VPBROADCASTD BX, Y2
    VPMULLD scale8x4<>(SB), Y2, Y2
    VPADDD offset8x4<>(SB), Y2, Y2
loop8x4:
    KXORQ K1, K1, K1
    KNOTQ K1, K1

    VMOVDQU32 (AX), Y1
    VPSHUFB Y0, Y1, Y1
    VPSCATTERDD Y1, K1, (DX)(Y2*1)

    ADDQ $32, AX
    ADDQ $8, DX
    CMPQ AX, DI
    JNE loop8x4
    VZEROUPPER

    CMPQ AX, CX
    JE done
loop1x4:
    MOVL (AX), SI
    MOVQ DX, DI

    MOVB SI, (DI)
    SHRL $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRL $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRL $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)

    ADDQ $4, AX
    INCQ DX
    CMPQ AX, CX
    JB loop1x4
done:
    RET

// func encodeDouble(dst, src []byte)
TEXT ·encodeDouble(SB), NOSPLIT, $0-48
    MOVQ src_base+24(FP), AX
    MOVQ src_len+32(FP), BX
    MOVQ dst_base+0(FP), DX

    MOVQ AX, CX
    ADDQ BX, CX
    SHRQ $3, BX

    CMPQ BX, $0
    JE done

    CMPB ·encodeDoubleHasAVX512(SB), $0
    JE loop1x8

    CMPQ BX, $8
    JB loop1x8

    MOVQ CX, DI
    SUBQ AX, DI
    SHRQ $6, DI
    SHLQ $6, DI
    ADDQ AX, DI

    VMOVDQU64 shuffle8x8<>(SB), Z0
    VPBROADCASTQ BX, Z2
    VPMULLQ scale8x8<>(SB), Z2, Z2
loop8x8:
    KXORQ K1, K1, K1
    KNOTQ K1, K1

    VMOVDQU64 (AX), Z1
    VPERMB Z1, Z0, Z1
    VPSCATTERQQ Z1, K1, (DX)(Z2*1)

    ADDQ $64, AX
    ADDQ $8, DX
    CMPQ AX, DI
    JNE loop8x8
    VZEROUPPER

    CMPQ AX, CX
    JE done
loop1x8:
    MOVQ (AX), SI
    MOVQ DX, DI

    MOVB SI, (DI)
    SHRQ $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRQ $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRQ $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRQ $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRQ $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRQ $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)
    SHRQ $8, SI
    ADDQ BX, DI

    MOVB SI, (DI)

    ADDQ $8, AX
    INCQ DX
    CMPQ AX, CX
    JB loop1x8
done:
    RET

// func decodeFloat(dst, src []byte)
TEXT ·decodeFloat(SB), NOSPLIT, $0-48
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), BX
    MOVQ src_base+24(FP), DX

    MOVQ AX, CX
    ADDQ BX, CX // end
    SHRQ $2, BX // len

    CMPQ BX, $0
    JE done

    CMPB ·decodeFloatHasAVX2(SB), $0
    JE loop1x4

    CMPQ BX, $8
    JB loop1x4

    MOVQ CX, DI
    SUBQ AX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    ADDQ AX, DI

    MOVQ $0xFFFFFFFF, SI
    MOVQ BX, X5
    MOVQ SI, X6
    VMOVDQU shuffle8x4<>(SB), Y0
    VPBROADCASTD X5, Y2
    VPBROADCASTD X6, Y3
    VPMULLD scale8x4<>(SB), Y2, Y2
    VPADDD offset8x4<>(SB), Y2, Y2
    VMOVDQU Y3, Y4
loop8x4:
    VPGATHERDD Y4, (DX)(Y2*1), Y1
    VPSHUFB Y0, Y1, Y1
    VMOVDQU Y1, (AX)
    VMOVDQU Y3, Y4

    ADDQ $32, AX
    ADDQ $8, DX
    CMPQ AX, DI
    JNE loop8x4
    VZEROUPPER

    CMPQ AX, CX
    JE done
loop1x4:
    MOVQ DX, DI
    MOVBLZX (DI), R8
    ADDQ BX, DI
    MOVBLZX (DI), R9
    ADDQ BX, DI
    MOVBLZX (DI), R10
    ADDQ BX, DI
    MOVBLZX (DI), R11

    SHLL $8, R9
    SHLL $16, R10
    SHLL $24, R11

    ORL R9, R8
    ORL R10, R8
    ORL R11, R8

    MOVL R8, (AX)

    ADDQ $4, AX
    INCQ DX
    CMPQ AX, CX
    JB loop1x4
done:
    RET

// func decodeDouble(dst, src []byte)
TEXT ·decodeDouble(SB), NOSPLIT, $0-48
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), BX
    MOVQ src_base+24(FP), DX

    MOVQ AX, CX
    ADDQ BX, CX
    SHRQ $3, BX

    CMPQ BX, $0
    JE done

    CMPB ·decodeDoubleHasAVX512(SB), $0
    JE loop1x8

    CMPQ BX, $8
    JB loop1x8

    MOVQ CX, DI
    SUBQ AX, DI
    SHRQ $6, DI
    SHLQ $6, DI
    ADDQ AX, DI

    VMOVDQU64 shuffle8x8<>(SB), Z0
    VPBROADCASTQ BX, Z2
    VPMULLQ scale8x8<>(SB), Z2, Z2
loop8x8:
    KXORQ K1, K1, K1
    KNOTQ K1, K1

    VPGATHERQQ (DX)(Z2*1), K1, Z1
    VPERMB Z1, Z0, Z1
    VMOVDQU64 Z1, (AX)

    ADDQ $64, AX
    ADDQ $8, DX
    CMPQ AX, DI
    JNE loop8x8
    VZEROUPPER

    CMPQ AX, CX
    JE done
loop1x8:
    MOVQ DX, DI
    XORQ R12, R12

    MOVBQZX (DI), R8
    ADDQ BX, DI
    MOVBQZX (DI), R9
    ADDQ BX, DI
    MOVBQZX (DI), R10
    ADDQ BX, DI
    MOVBQZX (DI), R11
    ADDQ BX, DI

    SHLQ $8, R9
    SHLQ $16, R10
    SHLQ $24, R11

    ORQ R8, R12
    ORQ R9, R12
    ORQ R10, R12
    ORQ R11, R12

    MOVBQZX (DI), R8
    ADDQ BX, DI
    MOVBQZX (DI), R9
    ADDQ BX, DI
    MOVBQZX (DI), R10
    ADDQ BX, DI
    MOVBQZX (DI), R11

    SHLQ $32, R8
    SHLQ $40, R9
    SHLQ $48, R10
    SHLQ $56, R11

    ORQ R8, R12
    ORQ R9, R12
    ORQ R10, R12
    ORQ R11, R12

    MOVQ R12, (AX)

    ADDQ $8, AX
    INCQ DX
    CMPQ AX, CX
    JB loop1x8
done:
    RET

GLOBL scale8x4<>(SB), RODATA|NOPTR, $32
DATA scale8x4<>+0(SB)/4,  $0
DATA scale8x4<>+4(SB)/4,  $1
DATA scale8x4<>+8(SB)/4,  $2
DATA scale8x4<>+12(SB)/4, $3
DATA scale8x4<>+16(SB)/4, $0
DATA scale8x4<>+20(SB)/4, $1
DATA scale8x4<>+24(SB)/4, $2
DATA scale8x4<>+28(SB)/4, $3

GLOBL offset8x4<>(SB), RODATA|NOPTR, $32
DATA offset8x4<>+0(SB)/4,  $0
DATA offset8x4<>+4(SB)/4,  $0
DATA offset8x4<>+8(SB)/4,  $0
DATA offset8x4<>+12(SB)/4, $0
DATA offset8x4<>+16(SB)/4, $4
DATA offset8x4<>+20(SB)/4, $4
DATA offset8x4<>+24(SB)/4, $4
DATA offset8x4<>+28(SB)/4, $4

GLOBL shuffle8x4<>(SB), RODATA|NOPTR, $32
DATA shuffle8x4<>+0(SB)/4,  $0x0C080400
DATA shuffle8x4<>+4(SB)/4,  $0x0D090501
DATA shuffle8x4<>+8(SB)/4,  $0x0E0A0602
DATA shuffle8x4<>+12(SB)/4, $0x0F0B0703
DATA shuffle8x4<>+16(SB)/4, $0x0C080400
DATA shuffle8x4<>+20(SB)/4, $0x0D090501
DATA shuffle8x4<>+24(SB)/4, $0x0E0A0602
DATA shuffle8x4<>+28(SB)/4, $0x0F0B0703

GLOBL scale8x8<>(SB), RODATA|NOPTR, $64
DATA scale8x8<>+0(SB)/8,  $0
DATA scale8x8<>+8(SB)/8,  $1
DATA scale8x8<>+16(SB)/8, $2
DATA scale8x8<>+24(SB)/8, $3
DATA scale8x8<>+32(SB)/8, $4
DATA scale8x8<>+40(SB)/8, $5
DATA scale8x8<>+48(SB)/8, $6
DATA scale8x8<>+56(SB)/8, $7

GLOBL shuffle8x8<>(SB), RODATA|NOPTR, $64
DATA shuffle8x8<>+0(SB)/8,  $0x3830282018100800
DATA shuffle8x8<>+8(SB)/8,  $0x3931292119110901
DATA shuffle8x8<>+16(SB)/8, $0x3A322A221A120A02
DATA shuffle8x8<>+24(SB)/8, $0x3B332B231B130B03
DATA shuffle8x8<>+32(SB)/8, $0x3C342C241C140C04
DATA shuffle8x8<>+40(SB)/8, $0x3D352D251D150D05
DATA shuffle8x8<>+48(SB)/8, $0x3E362E261E160E06
DATA shuffle8x8<>+56(SB)/8, $0x3F372F271F170F07


================================================
FILE: encoding/bytestreamsplit/bytestreamsplit_purego.go
================================================
//go:build purego || !amd64

package bytestreamsplit

import "github.com/segmentio/parquet-go/internal/unsafecast"

func encodeFloat(dst, src []byte) {
	n := len(src) / 4
	b0 := dst[0*n : 1*n]
	b1 := dst[1*n : 2*n]
	b2 := dst[2*n : 3*n]
	b3 := dst[3*n : 4*n]

	for i, v := range unsafecast.BytesToUint32(src) {
		b0[i] = byte(v >> 0)
		b1[i] = byte(v >> 8)
		b2[i] = byte(v >> 16)
		b3[i] = byte(v >> 24)
	}
}

func encodeDouble(dst, src []byte) {
	n := len(src) / 8
	b0 := dst[0*n : 1*n]
	b1 := dst[1*n : 2*n]
	b2 := dst[2*n : 3*n]
	b3 := dst[3*n : 4*n]
	b4 := dst[4*n : 5*n]
	b5 := dst[5*n : 6*n]
	b6 := dst[6*n : 7*n]
	b7 := dst[7*n : 8*n]

	for i, v := range unsafecast.BytesToUint64(src) {
		b0[i] = byte(v >> 0)
		b1[i] = byte(v >> 8)
		b2[i] = byte(v >> 16)
		b3[i] = byte(v >> 24)
		b4[i] = byte(v >> 32)
		b5[i] = byte(v >> 40)
		b6[i] = byte(v >> 48)
		b7[i] = byte(v >> 56)
	}
}

func decodeFloat(dst, src []byte) {
	n := len(src) / 4
	b0 := src[0*n : 1*n]
	b1 := src[1*n : 2*n]
	b2 := src[2*n : 3*n]
	b3 := src[3*n : 4*n]

	dst32 := unsafecast.BytesToUint32(dst)
	for i := range dst32 {
		dst32[i] = uint32(b0[i]) |
			uint32(b1[i])<<8 |
			uint32(b2[i])<<16 |
			uint32(b3[i])<<24
	}
}

func decodeDouble(dst, src []byte) {
	n := len(src) / 8
	b0 := src[0*n : 1*n]
	b1 := src[1*n : 2*n]
	b2 := src[2*n : 3*n]
	b3 := src[3*n : 4*n]
	b4 := src[4*n : 5*n]
	b5 := src[5*n : 6*n]
	b6 := src[6*n : 7*n]
	b7 := src[7*n : 8*n]

	dst64 := unsafecast.BytesToUint64(dst)
	for i := range dst64 {
		dst64[i] = uint64(b0[i]) |
			uint64(b1[i])<<8 |
			uint64(b2[i])<<16 |
			uint64(b3[i])<<24 |
			uint64(b4[i])<<32 |
			uint64(b5[i])<<40 |
			uint64(b6[i])<<48 |
			uint64(b7[i])<<56
	}
}


================================================
FILE: encoding/bytestreamsplit/bytestreamsplit_test.go
================================================
//go:build go1.18
// +build go1.18

package bytestreamsplit_test

import (
	"testing"

	"github.com/segmentio/parquet-go/encoding/bytestreamsplit"
	"github.com/segmentio/parquet-go/encoding/fuzz"
	"github.com/segmentio/parquet-go/encoding/test"
)

func FuzzEncodeFloat(f *testing.F) {
	fuzz.EncodeFloat(f, new(bytestreamsplit.Encoding))
}

func FuzzEncodeDouble(f *testing.F) {
	fuzz.EncodeDouble(f, new(bytestreamsplit.Encoding))
}

func TestEncodeFloat(t *testing.T) {
	test.EncodeFloat(t, new(bytestreamsplit.Encoding), 0, 100)
}

func TestEncodeDouble(t *testing.T) {
	test.EncodeDouble(t, new(bytestreamsplit.Encoding), 0, 100)
}


================================================
FILE: encoding/delta/binary_packed.go
================================================
package delta

import (
	"encoding/binary"
	"fmt"
	"io"
	"math"
	"math/bits"

	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/bitpack"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

type BinaryPackedEncoding struct {
	encoding.NotSupported
}

func (e *BinaryPackedEncoding) String() string {
	return "DELTA_BINARY_PACKED"
}

func (e *BinaryPackedEncoding) Encoding() format.Encoding {
	return format.DeltaBinaryPacked
}

func (e *BinaryPackedEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
	return encodeInt32(dst[:0], src), nil
}

func (e *BinaryPackedEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
	return encodeInt64(dst[:0], src), nil
}

func (e *BinaryPackedEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
	buf := unsafecast.Int32ToBytes(dst)
	buf, _, err := decodeInt32(buf[:0], src)
	return unsafecast.BytesToInt32(buf), e.wrap(err)
}

func (e *BinaryPackedEncoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) {
	buf := unsafecast.Int64ToBytes(dst)
	buf, _, err := decodeInt64(buf[:0], src)
	return unsafecast.BytesToInt64(buf), e.wrap(err)
}

func (e *BinaryPackedEncoding) wrap(err error) error {
	if err != nil {
		err = encoding.Error(e, err)
	}
	return err
}

const (
	blockSize     = 128
	numMiniBlocks = 4
	miniBlockSize = blockSize / numMiniBlocks
	// The parquet spec does not enforce a limit to the block size, but we need
	// one otherwise invalid inputs may result in unbounded memory allocations.
	//
	// 65K+ values should be enough for any valid use case.
	maxSupportedBlockSize = 65536

	maxHeaderLength32    = 4 * binary.MaxVarintLen64
	maxMiniBlockLength32 = binary.MaxVarintLen64 + numMiniBlocks + (4 * blockSize)

	maxHeaderLength64    = 8 * binary.MaxVarintLen64
	maxMiniBlockLength64 = binary.MaxVarintLen64 + numMiniBlocks + (8 * blockSize)
)

var (
	encodeInt32 = encodeInt32Default
	encodeInt64 = encodeInt64Default
)

func encodeInt32Default(dst []byte, src []int32) []byte {
	totalValues := len(src)
	firstValue := int32(0)
	if totalValues > 0 {
		firstValue = src[0]
	}

	n := len(dst)
	dst = resize(dst, n+maxHeaderLength32)
	dst = dst[:n+encodeBinaryPackedHeader(dst[n:], blockSize, numMiniBlocks, totalValues, int64(firstValue))]

	if totalValues < 2 {
		return dst
	}

	lastValue := firstValue
	for i := 1; i < len(src); i += blockSize {
		block := [blockSize]int32{}
		blockLength := copy(block[:], src[i:])

		lastValue = blockDeltaInt32(&block, lastValue)
		minDelta := blockMinInt32(&block)
		blockSubInt32(&block, minDelta)
		blockClearInt32(&block, blockLength)

		bitWidths := [numMiniBlocks]byte{}
		blockBitWidthsInt32(&bitWidths, &block)

		n := len(dst)
		dst = resize(dst, n+maxMiniBlockLength32+4)
		n += encodeBlockHeader(dst[n:], int64(minDelta), bitWidths)

		for i, bitWidth := range bitWidths {
			if bitWidth != 0 {
				miniBlock := (*[miniBlockSize]int32)(block[i*miniBlockSize:])
				encodeMiniBlockInt32(dst[n:], miniBlock, uint(bitWidth))
				n += (miniBlockSize * int(bitWidth)) / 8
			}
		}

		dst = dst[:n]
	}

	return dst
}

func encodeInt64Default(dst []byte, src []int64) []byte {
	totalValues := len(src)
	firstValue := int64(0)
	if totalValues > 0 {
		firstValue = src[0]
	}

	n := len(dst)
	dst = resize(dst, n+maxHeaderLength64)
	dst = dst[:n+encodeBinaryPackedHeader(dst[n:], blockSize, numMiniBlocks, totalValues, firstValue)]

	if totalValues < 2 {
		return dst
	}

	lastValue := firstValue
	for i := 1; i < len(src); i += blockSize {
		block := [blockSize]int64{}
		blockLength := copy(block[:], src[i:])

		lastValue = blockDeltaInt64(&block, lastValue)
		minDelta := blockMinInt64(&block)
		blockSubInt64(&block, minDelta)
		blockClearInt64(&block, blockLength)

		bitWidths := [numMiniBlocks]byte{}
		blockBitWidthsInt64(&bitWidths, &block)

		n := len(dst)
		dst = resize(dst, n+maxMiniBlockLength64+8)
		n += encodeBlockHeader(dst[n:], minDelta, bitWidths)

		for i, bitWidth := range bitWidths {
			if bitWidth != 0 {
				miniBlock := (*[miniBlockSize]int64)(block[i*miniBlockSize:])
				encodeMiniBlockInt64(dst[n:], miniBlock, uint(bitWidth))
				n += (miniBlockSize * int(bitWidth)) / 8
			}
		}

		dst = dst[:n]
	}

	return dst
}

func encodeBinaryPackedHeader(dst []byte, blockSize, numMiniBlocks, totalValues int, firstValue int64) (n int) {
	n += binary.PutUvarint(dst[n:], uint64(blockSize))
	n += binary.PutUvarint(dst[n:], uint64(numMiniBlocks))
	n += binary.PutUvarint(dst[n:], uint64(totalValues))
	n += binary.PutVarint(dst[n:], firstValue)
	return n
}

func encodeBlockHeader(dst []byte, minDelta int64, bitWidths [numMiniBlocks]byte) (n int) {
	n += binary.PutVarint(dst, int64(minDelta))
	n += copy(dst[n:], bitWidths[:])
	return n
}

func blockClearInt32(block *[blockSize]int32, blockLength int) {
	if blockLength < blockSize {
		clear := block[blockLength:]
		for i := range clear {
			clear[i] = 0
		}
	}
}

func blockDeltaInt32(block *[blockSize]int32, lastValue int32) int32 {
	for i, v := range block {
		block[i], lastValue = v-lastValue, v
	}
	return lastValue
}

func blockMinInt32(block *[blockSize]int32) int32 {
	min := block[0]
	for _, v := range block[1:] {
		if v < min {
			min = v
		}
	}
	return min
}

func blockSubInt32(block *[blockSize]int32, value int32) {
	for i := range block {
		block[i] -= value
	}
}

func blockBitWidthsInt32(bitWidths *[numMiniBlocks]byte, block *[blockSize]int32) {
	for i := range bitWidths {
		j := (i + 0) * miniBlockSize
		k := (i + 1) * miniBlockSize
		bitWidth := 0

		for _, v := range block[j:k] {
			if n := bits.Len32(uint32(v)); n > bitWidth {
				bitWidth = n
			}
		}

		bitWidths[i] = byte(bitWidth)
	}
}

func blockClearInt64(block *[blockSize]int64, blockLength int) {
	if blockLength < blockSize {
		clear := block[blockLength:]
		for i := range clear {
			clear[i] = 0
		}
	}
}

func blockDeltaInt64(block *[blockSize]int64, lastValue int64) int64 {
	for i, v := range block {
		block[i], lastValue = v-lastValue, v
	}
	return lastValue
}

func blockMinInt64(block *[blockSize]int64) int64 {
	min := block[0]
	for _, v := range block[1:] {
		if v < min {
			min = v
		}
	}
	return min
}

func blockSubInt64(block *[blockSize]int64, value int64) {
	for i := range block {
		block[i] -= value
	}
}

func blockBitWidthsInt64(bitWidths *[numMiniBlocks]byte, block *[blockSize]int64) {
	for i := range bitWidths {
		j := (i + 0) * miniBlockSize
		k := (i + 1) * miniBlockSize
		bitWidth := 0

		for _, v := range block[j:k] {
			if n := bits.Len64(uint64(v)); n > bitWidth {
				bitWidth = n
			}
		}

		bitWidths[i] = byte(bitWidth)
	}
}

func decodeInt32(dst, src []byte) ([]byte, []byte, error) {
	blockSize, numMiniBlocks, totalValues, firstValue, src, err := decodeBinaryPackedHeader(src)
	if err != nil {
		return dst, src, err
	}
	if totalValues == 0 {
		return dst, src, nil
	}
	if firstValue < math.MinInt32 || firstValue > math.MaxInt32 {
		return dst, src, fmt.Errorf("first value out of range: %d", firstValue)
	}

	writeOffset := len(dst)
	dst = resize(dst, len(dst)+4*totalValues)
	out := unsafecast.BytesToInt32(dst)
	out[writeOffset] = int32(firstValue)
	writeOffset++
	totalValues--
	lastValue := int32(firstValue)
	numValuesInMiniBlock := blockSize / numMiniBlocks

	const padding = 16
	miniBlockTemp := make([]byte, 256+padding)

	for totalValues > 0 && len(src) > 0 {
		var minDelta int64
		var bitWidths []byte
		minDelta, bitWidths, src, err = decodeBinaryPackedBlock(src, numMiniBlocks)
		if err != nil {
			return dst, src, err
		}

		blockOffset := writeOffset

		for _, bitWidth := range bitWidths {
			n := min(numValuesInMiniBlock, totalValues)
			if bitWidth != 0 {
				miniBlockSize := (numValuesInMiniBlock * int(bitWidth)) / 8
				miniBlockData := src
				if miniBlockSize <= len(src) {
					miniBlockData = miniBlockData[:miniBlockSize]
				}
				src = src[len(miniBlockData):]
				if cap(miniBlockData) < miniBlockSize+bitpack.PaddingInt32 {
					miniBlockTemp = resize(miniBlockTemp[:0], miniBlockSize+bitpack.PaddingInt32)
					miniBlockData = miniBlockTemp[:copy(miniBlockTemp, miniBlockData)]
				}
				miniBlockData = miniBlockData[:miniBlockSize]
				bitpack.UnpackInt32(out[writeOffset:writeOffset+n], miniBlockData, uint(bitWidth))
			}
			writeOffset += n
			totalValues -= n
			if totalValues == 0 {
				break
			}
		}

		lastValue = decodeBlockInt32(out[blockOffset:writeOffset], int32(minDelta), lastValue)
	}

	if totalValues > 0 {
		return dst, src, fmt.Errorf("%d missing values: %w", totalValues, io.ErrUnexpectedEOF)
	}

	return dst, src, nil
}

func decodeInt64(dst, src []byte) ([]byte, []byte, error) {
	blockSize, numMiniBlocks, totalValues, firstValue, src, err := decodeBinaryPackedHeader(src)
	if err != nil {
		return dst, src, err
	}
	if totalValues == 0 {
		return dst, src, nil
	}

	writeOffset := len(dst)
	dst = resize(dst, len(dst)+8*totalValues)
	out := unsafecast.BytesToInt64(dst)
	out[writeOffset] = firstValue
	writeOffset++
	totalValues--
	lastValue := firstValue
	numValuesInMiniBlock := blockSize / numMiniBlocks

	const padding = 16
	miniBlockTemp := make([]byte, 512+padding)

	for totalValues > 0 && len(src) > 0 {
		var minDelta int64
		var bitWidths []byte
		minDelta, bitWidths, src, err = decodeBinaryPackedBlock(src, numMiniBlocks)
		if err != nil {
			return dst, src, err
		}
		blockOffset := writeOffset

		for _, bitWidth := range bitWidths {
			n := min(numValuesInMiniBlock, totalValues)
			if bitWidth != 0 {
				miniBlockSize := (numValuesInMiniBlock * int(bitWidth)) / 8
				miniBlockData := src
				if miniBlockSize <= len(src) {
					miniBlockData = src[:miniBlockSize]
				}
				src = src[len(miniBlockData):]
				if len(miniBlockData) < miniBlockSize+bitpack.PaddingInt64 {
					miniBlockTemp = resize(miniBlockTemp[:0], miniBlockSize+bitpack.PaddingInt64)
					miniBlockData = miniBlockTemp[:copy(miniBlockTemp, miniBlockData)]
				}
				miniBlockData = miniBlockData[:miniBlockSize]
				bitpack.UnpackInt64(out[writeOffset:writeOffset+n], miniBlockData, uint(bitWidth))
			}
			writeOffset += n
			totalValues -= n
			if totalValues == 0 {
				break
			}
		}

		lastValue = decodeBlockInt64(out[blockOffset:writeOffset], minDelta, lastValue)
	}

	if totalValues > 0 {
		return dst, src, fmt.Errorf("%d missing values: %w", totalValues, io.ErrUnexpectedEOF)
	}

	return dst, src, nil
}

func decodeBinaryPackedHeader(src []byte) (blockSize, numMiniBlocks, totalValues int, firstValue int64, next []byte, err error) {
	u := uint64(0)
	n := 0
	i := 0

	if u, n, err = decodeUvarint(src[i:], "block size"); err != nil {
		return
	}
	i += n
	blockSize = int(u)

	if u, n, err = decodeUvarint(src[i:], "number of mini-blocks"); err != nil {
		return
	}
	i += n
	numMiniBlocks = int(u)

	if u, n, err = decodeUvarint(src[i:], "total values"); err != nil {
		return
	}
	i += n
	totalValues = int(u)

	if firstValue, n, err = decodeVarint(src[i:], "first value"); err != nil {
		return
	}
	i += n

	if numMiniBlocks == 0 {
		err = fmt.Errorf("invalid number of mini block (%d)", numMiniBlocks)
	} else if (blockSize <= 0) || (blockSize%128) != 0 {
		err = fmt.Errorf("invalid block size is not a multiple of 128 (%d)", blockSize)
	} else if blockSize > maxSupportedBlockSize {
		err = fmt.Errorf("invalid block size is too large (%d)", blockSize)
	} else if miniBlockSize := blockSize / numMiniBlocks; (numMiniBlocks <= 0) || (miniBlockSize%32) != 0 {
		err = fmt.Errorf("invalid mini block size is not a multiple of 32 (%d)", miniBlockSize)
	} else if totalValues < 0 {
		err = fmt.Errorf("invalid total number of values is negative (%d)", totalValues)
	} else if totalValues > math.MaxInt32 {
		err = fmt.Errorf("too many values: %d", totalValues)
	}

	return blockSize, numMiniBlocks, totalValues, firstValue, src[i:], err
}

func decodeBinaryPackedBlock(src []byte, numMiniBlocks int) (minDelta int64, bitWidths, next []byte, err error) {
	minDelta, n, err := decodeVarint(src, "min delta")
	if err != nil {
		return 0, nil, src, err
	}
	src = src[n:]
	if len(src) < numMiniBlocks {
		bitWidths, next = src, nil
	} else {
		bitWidths, next = src[:numMiniBlocks], src[numMiniBlocks:]
	}
	return minDelta, bitWidths, next, nil
}

func decodeUvarint(buf []byte, what string) (u uint64, n int, err error) {
	u, n = binary.Uvarint(buf)
	if n == 0 {
		return 0, 0, fmt.Errorf("decoding %s: %w", what, io.ErrUnexpectedEOF)
	}
	if n < 0 {
		return 0, 0, fmt.Errorf("overflow decoding %s (read %d/%d bytes)", what, -n, len(buf))
	}
	return u, n, nil
}

func decodeVarint(buf []byte, what string) (v int64, n int, err error) {
	v, n = binary.Varint(buf)
	if n == 0 {
		return 0, 0, fmt.Errorf("decoding %s: %w", what, io.ErrUnexpectedEOF)
	}
	if n < 0 {
		return 0, 0, fmt.Errorf("overflow decoding %s (read %d/%d bytes)", what, -n, len(buf))
	}
	return v, n, nil
}


================================================
FILE: encoding/delta/binary_packed_amd64.go
================================================
//go:build !purego

package delta

import (
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"golang.org/x/sys/cpu"
)

func init() {
	if cpu.X86.HasAVX2 {
		encodeInt32 = encodeInt32AVX2
		encodeInt64 = encodeInt64AVX2
	}
}

//go:noescape
func blockDeltaInt32AVX2(block *[blockSize]int32, lastValue int32) int32

//go:noescape
func blockMinInt32AVX2(block *[blockSize]int32) int32

//go:noescape
func blockSubInt32AVX2(block *[blockSize]int32, value int32)

//go:noescape
func blockBitWidthsInt32AVX2(bitWidths *[numMiniBlocks]byte, block *[blockSize]int32)

//go:noescape
func encodeMiniBlockInt32Default(dst *byte, src *[miniBlockSize]int32, bitWidth uint)

//go:noescape
func encodeMiniBlockInt32x1bitAVX2(dst *byte, src *[miniBlockSize]int32)

//go:noescape
func encodeMiniBlockInt32x2bitsAVX2(dst *byte, src *[miniBlockSize]int32)

//go:noescape
func encodeMiniBlockInt32x3to16bitsAVX2(dst *byte, src *[miniBlockSize]int32, bitWidth uint)

//go:noescape
func encodeMiniBlockInt32x32bitsAVX2(dst *byte, src *[miniBlockSize]int32)

func encodeMiniBlockInt32(dst []byte, src *[miniBlockSize]int32, bitWidth uint) {
	encodeMiniBlockInt32Default(&dst[0], src, bitWidth)
}

func encodeMiniBlockInt32AVX2(dst *byte, src *[miniBlockSize]int32, bitWidth uint) {
	switch {
	case bitWidth == 1:
		encodeMiniBlockInt32x1bitAVX2(dst, src)
	case bitWidth == 2:
		encodeMiniBlockInt32x2bitsAVX2(dst, src)
	case bitWidth == 32:
		encodeMiniBlockInt32x32bitsAVX2(dst, src)
	case bitWidth <= 16:
		encodeMiniBlockInt32x3to16bitsAVX2(dst, src, bitWidth)
	default:
		encodeMiniBlockInt32Default(dst, src, bitWidth)
	}
}

func encodeInt32AVX2(dst []byte, src []int32) []byte {
	totalValues := len(src)
	firstValue := int32(0)
	if totalValues > 0 {
		firstValue = src[0]
	}

	n := len(dst)
	dst = resize(dst, n+maxHeaderLength32)
	dst = dst[:n+encodeBinaryPackedHeader(dst[n:], blockSize, numMiniBlocks, totalValues, int64(firstValue))]

	if totalValues < 2 {
		return dst
	}

	lastValue := firstValue
	for i := 1; i < len(src); i += blockSize {
		block := [blockSize]int32{}
		blockLength := copy(block[:], src[i:])

		lastValue = blockDeltaInt32AVX2(&block, lastValue)
		minDelta := blockMinInt32AVX2(&block)
		blockSubInt32AVX2(&block, minDelta)
		blockClearInt32(&block, blockLength)

		bitWidths := [numMiniBlocks]byte{}
		blockBitWidthsInt32AVX2(&bitWidths, &block)

		n := len(dst)
		dst = resize(dst, n+maxMiniBlockLength32+16)
		n += encodeBlockHeader(dst[n:], int64(minDelta), bitWidths)

		for i, bitWidth := range bitWidths {
			if bitWidth != 0 {
				miniBlock := (*[miniBlockSize]int32)(block[i*miniBlockSize:])
				encodeMiniBlockInt32AVX2(&dst[n], miniBlock, uint(bitWidth))
				n += (miniBlockSize * int(bitWidth)) / 8
			}
		}

		dst = dst[:n]
	}

	return dst
}

//go:noescape
func blockDeltaInt64AVX2(block *[blockSize]int64, lastValue int64) int64

//go:noescape
func blockMinInt64AVX2(block *[blockSize]int64) int64

//go:noescape
func blockSubInt64AVX2(block *[blockSize]int64, value int64)

//go:noescape
func blockBitWidthsInt64AVX2(bitWidths *[numMiniBlocks]byte, block *[blockSize]int64)

//go:noescape
func encodeMiniBlockInt64Default(dst *byte, src *[miniBlockSize]int64, bitWidth uint)

//go:noescape
func encodeMiniBlockInt64x1bitAVX2(dst *byte, src *[miniBlockSize]int64)

//go:noescape
func encodeMiniBlockInt64x2bitsAVX2(dst *byte, src *[miniBlockSize]int64)

//go:noescape
func encodeMiniBlockInt64x64bitsAVX2(dst *byte, src *[miniBlockSize]int64)

func encodeMiniBlockInt64(dst []byte, src *[miniBlockSize]int64, bitWidth uint) {
	encodeMiniBlockInt64Default(&dst[0], src, bitWidth)
}

func encodeMiniBlockInt64AVX2(dst *byte, src *[miniBlockSize]int64, bitWidth uint) {
	switch {
	case bitWidth == 1:
		encodeMiniBlockInt64x1bitAVX2(dst, src)
	case bitWidth == 2:
		encodeMiniBlockInt64x2bitsAVX2(dst, src)
	case bitWidth == 64:
		encodeMiniBlockInt64x64bitsAVX2(dst, src)
	default:
		encodeMiniBlockInt64Default(dst, src, bitWidth)
	}
}

func encodeInt64AVX2(dst []byte, src []int64) []byte {
	totalValues := len(src)
	firstValue := int64(0)
	if totalValues > 0 {
		firstValue = src[0]
	}

	n := len(dst)
	dst = resize(dst, n+maxHeaderLength64)
	dst = dst[:n+encodeBinaryPackedHeader(dst[n:], blockSize, numMiniBlocks, totalValues, int64(firstValue))]

	if totalValues < 2 {
		return dst
	}

	lastValue := firstValue
	for i := 1; i < len(src); i += blockSize {
		block := [blockSize]int64{}
		blockLength := copy(block[:], src[i:])

		lastValue = blockDeltaInt64AVX2(&block, lastValue)
		minDelta := blockMinInt64AVX2(&block)
		blockSubInt64AVX2(&block, minDelta)
		blockClearInt64(&block, blockLength)

		bitWidths := [numMiniBlocks]byte{}
		blockBitWidthsInt64AVX2(&bitWidths, &block)

		n := len(dst)
		dst = resize(dst, n+maxMiniBlockLength64+16)
		n += encodeBlockHeader(dst[n:], int64(minDelta), bitWidths)

		for i, bitWidth := range bitWidths {
			if bitWidth != 0 {
				miniBlock := (*[miniBlockSize]int64)(block[i*miniBlockSize:])
				encodeMiniBlockInt64AVX2(&dst[n], miniBlock, uint(bitWidth))
				n += (miniBlockSize * int(bitWidth)) / 8
			}
		}

		dst = dst[:n]
	}

	return dst
}

//go:noescape
func decodeBlockInt32Default(dst []int32, minDelta, lastValue int32) int32

//go:noescape
func decodeBlockInt32AVX2(dst []int32, minDelta, lastValue int32) int32

func decodeBlockInt32(dst []int32, minDelta, lastValue int32) int32 {
	switch {
	case cpu.X86.HasAVX2:
		return decodeBlockInt32AVX2(dst, minDelta, lastValue)
	default:
		return decodeBlockInt32Default(dst, minDelta, lastValue)
	}
}

//go:noescape
func decodeMiniBlockInt32Default(dst []int32, src []uint32, bitWidth uint)

//go:noescape
func decodeMiniBlockInt32x1to16bitsAVX2(dst []int32, src []uint32, bitWidth uint)

//go:noescape
func decodeMiniBlockInt32x17to26bitsAVX2(dst []int32, src []uint32, bitWidth uint)

//go:noescape
func decodeMiniBlockInt32x27to31bitsAVX2(dst []int32, src []uint32, bitWidth uint)

func decodeMiniBlockInt32(dst []int32, src []uint32, bitWidth uint) {
	hasAVX2 := cpu.X86.HasAVX2
	switch {
	case hasAVX2 && bitWidth <= 16:
		decodeMiniBlockInt32x1to16bitsAVX2(dst, src, bitWidth)
	case hasAVX2 && bitWidth <= 26:
		decodeMiniBlockInt32x17to26bitsAVX2(dst, src, bitWidth)
	case hasAVX2 && bitWidth <= 31:
		decodeMiniBlockInt32x27to31bitsAVX2(dst, src, bitWidth)
	case bitWidth == 32:
		copy(dst, unsafecast.Uint32ToInt32(src))
	default:
		decodeMiniBlockInt32Default(dst, src, bitWidth)
	}
}

//go:noescape
func decodeBlockInt64Default(dst []int64, minDelta, lastValue int64) int64

func decodeBlockInt64(dst []int64, minDelta, lastValue int64) int64 {
	return decodeBlockInt64Default(dst, minDelta, lastValue)
}

//go:noescape
func decodeMiniBlockInt64Default(dst []int64, src []uint32, bitWidth uint)

func decodeMiniBlockInt64(dst []int64, src []uint32, bitWidth uint) {
	switch {
	case bitWidth == 64:
		copy(dst, unsafecast.Uint32ToInt64(src))
	default:
		decodeMiniBlockInt64Default(dst, src, bitWidth)
	}
}


================================================
FILE: encoding/delta/binary_packed_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define blockSize 128
#define numMiniBlocks 4
#define miniBlockSize 32

// -----------------------------------------------------------------------------
// 32 bits
// -----------------------------------------------------------------------------

#define deltaInt32AVX2x8(baseAddr) \
    VMOVDQU baseAddr, Y1    \ // [0,1,2,3,4,5,6,7]
    VPERMD Y1, Y3, Y2       \ // [7,0,1,2,3,4,5,6]
    VPBLENDD $1, Y0, Y2, Y2 \ // [x,0,1,2,3,4,5,6]
    VPSUBD Y2, Y1, Y2       \ // [0,1,2,...] - [x,0,1,...]
    VMOVDQU Y2, baseAddr    \
    VPERMD Y1, Y3, Y0

// func blockDeltaInt32AVX2(block *[blockSize]int32, lastValue int32) int32
TEXT ·blockDeltaInt32AVX2(SB), NOSPLIT, $0-20
    MOVQ block+0(FP), AX
    MOVL 4*blockSize-4(AX), CX
    MOVL CX, ret+16(FP)

    VPBROADCASTD lastValue+8(FP), Y0
    VMOVDQU ·rotateLeft32(SB), Y3

    XORQ SI, SI
loop:
    deltaInt32AVX2x8(0(AX)(SI*4))
    deltaInt32AVX2x8(32(AX)(SI*4))
    deltaInt32AVX2x8(64(AX)(SI*4))
    deltaInt32AVX2x8(96(AX)(SI*4))
    ADDQ $32, SI
    CMPQ SI, $blockSize
    JNE loop
    VZEROUPPER
    RET

// func blockMinInt32AVX2(block *[blockSize]int32) int32
TEXT ·blockMinInt32AVX2(SB), NOSPLIT, $0-12
    MOVQ block+0(FP), AX
    VPBROADCASTD (AX), Y15

    VPMINSD 0(AX), Y15, Y0
    VPMINSD 32(AX), Y15, Y1
    VPMINSD 64(AX), Y15, Y2
    VPMINSD 96(AX), Y15, Y3
    VPMINSD 128(AX), Y15, Y4
    VPMINSD 160(AX), Y15, Y5
    VPMINSD 192(AX), Y15, Y6
    VPMINSD 224(AX), Y15, Y7
    VPMINSD 256(AX), Y15, Y8
    VPMINSD 288(AX), Y15, Y9
    VPMINSD 320(AX), Y15, Y10
    VPMINSD 352(AX), Y15, Y11
    VPMINSD 384(AX), Y15, Y12
    VPMINSD 416(AX), Y15, Y13
    VPMINSD 448(AX), Y15, Y14
    VPMINSD 480(AX), Y15, Y15

    VPMINSD Y1, Y0, Y0
    VPMINSD Y3, Y2, Y2
    VPMINSD Y5, Y4, Y4
    VPMINSD Y7, Y6, Y6
    VPMINSD Y9, Y8, Y8
    VPMINSD Y11, Y10, Y10
    VPMINSD Y13, Y12, Y12
    VPMINSD Y15, Y14, Y14

    VPMINSD Y2, Y0, Y0
    VPMINSD Y6, Y4, Y4
    VPMINSD Y10, Y8, Y8
    VPMINSD Y14, Y12, Y12

    VPMINSD Y4, Y0, Y0
    VPMINSD Y12, Y8, Y8

    VPMINSD Y8, Y0, Y0

    VPERM2I128 $1, Y0, Y0, Y1
    VPMINSD Y1, Y0, Y0

    VPSHUFD $0b00011011, Y0, Y1
    VPMINSD Y1, Y0, Y0
    VZEROUPPER

    MOVQ X0, CX
    MOVL CX, BX
    SHRQ $32, CX
    CMPL CX, BX
    CMOVLLT CX, BX
    MOVL BX, ret+8(FP)
    RET

#define subInt32AVX2x32(baseAddr, offset) \
    VMOVDQU offset+0(baseAddr), Y1      \
    VMOVDQU offset+32(baseAddr), Y2     \
    VMOVDQU offset+64(baseAddr), Y3     \
    VMOVDQU offset+96(baseAddr), Y4     \
    VPSUBD Y0, Y1, Y1                   \
    VPSUBD Y0, Y2, Y2                   \
    VPSUBD Y0, Y3, Y3                   \
    VPSUBD Y0, Y4, Y4                   \
    VMOVDQU Y1, offset+0(baseAddr)      \
    VMOVDQU Y2, offset+32(baseAddr)     \
    VMOVDQU Y3, offset+64(baseAddr)     \
    VMOVDQU Y4, offset+96(baseAddr)

// func blockSubInt32AVX2(block *[blockSize]int32, value int32)
TEXT ·blockSubInt32AVX2(SB), NOSPLIT, $0-12
    MOVQ block+0(FP), AX
    VPBROADCASTD value+8(FP), Y0
    subInt32AVX2x32(AX, 0)
    subInt32AVX2x32(AX, 128)
    subInt32AVX2x32(AX, 256)
    subInt32AVX2x32(AX, 384)
    VZEROUPPER
    RET

// func blockBitWidthsInt32AVX2(bitWidths *[numMiniBlocks]byte, block *[blockSize]int32)
TEXT ·blockBitWidthsInt32AVX2(SB), NOSPLIT, $0-16
    MOVQ bitWidths+0(FP), AX
    MOVQ block+8(FP), BX

    // AVX2 only has signed comparisons (and min/max), we emulate working on
    // unsigned values by adding -2^31 to the values. Y5 is a vector of -2^31
    // used to offset 8 packed 32 bits integers in other YMM registers where
    // the block data are loaded.
    VPCMPEQD Y5, Y5, Y5
    VPSLLD $31, Y5, Y5

    XORQ DI, DI
loop:
    VPBROADCASTD (BX), Y0 // max
    VPADDD Y5, Y0, Y0

    VMOVDQU (BX), Y1
    VMOVDQU 32(BX), Y2
    VMOVDQU 64(BX), Y3
    VMOVDQU 96(BX), Y4

    VPADDD Y5, Y1, Y1
    VPADDD Y5, Y2, Y2
    VPADDD Y5, Y3, Y3
    VPADDD Y5, Y4, Y4

    VPMAXSD Y2, Y1, Y1
    VPMAXSD Y4, Y3, Y3
    VPMAXSD Y3, Y1, Y1
    VPMAXSD Y1, Y0, Y0

    VPERM2I128 $1, Y0, Y0, Y1
    VPMAXSD Y1, Y0, Y0

    VPSHUFD $0b00011011, Y0, Y1
    VPMAXSD Y1, Y0, Y0
    VPSUBD Y5, Y0, Y0

    MOVQ X0, CX
    MOVL CX, DX
    SHRQ $32, CX
    CMPL CX, DX
    CMOVLHI CX, DX

    LZCNTL DX, DX
    NEGL DX
    ADDL $32, DX
    MOVB DX, (AX)(DI*1)

    ADDQ $128, BX
    INCQ DI
    CMPQ DI, $numMiniBlocks
    JNE loop
    VZEROUPPER
    RET

// encodeMiniBlockInt32Default is the generic implementation of the algorithm to
// pack 32 bit integers into values of a given bit width (<=32).
//
// This algorithm is much slower than the vectorized versions, but is useful
// as a reference implementation to run the tests against, and as fallback when
// the code runs on a CPU which does not support the AVX2 instruction set.
//
// func encodeMiniBlockInt32Default(dst *byte, src *[miniBlockSize]int32, bitWidth uint)
TEXT ·encodeMiniBlockInt32Default(SB), NOSPLIT, $0-24
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX
    MOVQ bitWidth+16(FP), R9

    XORQ DI, DI // bitOffset
    XORQ SI, SI
loop:
    MOVQ DI, CX
    MOVQ DI, DX

    ANDQ $0b11111, CX // bitOffset % 32
    SHRQ $5, DX       // bitOffset / 32

    MOVLQZX (BX)(SI*4), R8
    SHLQ CX, R8
    ORQ R8, (AX)(DX*4)

    ADDQ R9, DI
    INCQ SI
    CMPQ SI, $miniBlockSize
    JNE loop
    RET

// encodeMiniBlockInt32x1bitAVX2 packs 32 bit integers into 1 bit values in the
// the output buffer.
//
// The algorithm uses MOVMSKPS to extract the 8 relevant bits from the 8 values
// packed in YMM registers, then combines 4 of these into a 32 bit word which
// then gets written to the output. The result is 32 bits because each mini
// block has 32 values (the block size is 128 and there are 4 mini blocks per
// block).
//
// func encodeMiniBlockInt32x1bitAVX2(dst *byte, src *[miniBlockSize]int32)
TEXT ·encodeMiniBlockInt32x1bitAVX2(SB), NOSPLIT, $0-16
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX

    VMOVDQU 0(BX), Y0
    VMOVDQU 32(BX), Y1
    VMOVDQU 64(BX), Y2
    VMOVDQU 96(BX), Y3

    VPSLLD $31, Y0, Y0
    VPSLLD $31, Y1, Y1
    VPSLLD $31, Y2, Y2
    VPSLLD $31, Y3, Y3

    VMOVMSKPS Y0, R8
    VMOVMSKPS Y1, R9
    VMOVMSKPS Y2, R10
    VMOVMSKPS Y3, R11

    SHLL $8, R9
    SHLL $16, R10
    SHLL $24, R11

    ORL R9, R8
    ORL R10, R8
    ORL R11, R8
    MOVL R8, (AX)
    VZEROUPPER
    RET

// encodeMiniBlockInt32x2bitsAVX2 implements an algorithm for packing 32 bit
// integers into 2 bit values.
//
// The algorithm is derived from the one employed in encodeMiniBlockInt32x1bitAVX2
// but needs to perform a bit extra work since MOVMSKPS can only extract one bit
// per packed integer of each YMM vector. We run two passes to extract the two
// bits needed to compose each item of the result, and merge the values by
// interleaving the first and second bits with PDEP.
//
// func encodeMiniBlockInt32x2bitsAVX2(dst *byte, src *[miniBlockSize]int32)
TEXT ·encodeMiniBlockInt32x2bitsAVX2(SB), NOSPLIT, $0-16
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX

    VMOVDQU 0(BX), Y0
    VMOVDQU 32(BX), Y1
    VMOVDQU 64(BX), Y2
    VMOVDQU 96(BX), Y3

    VPSLLD $31, Y0, Y4
    VPSLLD $31, Y1, Y5
    VPSLLD $31, Y2, Y6
    VPSLLD $31, Y3, Y7

    VMOVMSKPS Y4, R8
    VMOVMSKPS Y5, R9
    VMOVMSKPS Y6, R10
    VMOVMSKPS Y7, R11

    SHLQ $8, R9
    SHLQ $16, R10
    SHLQ $24, R11
    ORQ R9, R8
    ORQ R10, R8
    ORQ R11, R8

    MOVQ $0x5555555555555555, DX // 0b010101...
    PDEPQ DX, R8, R8

    VPSLLD $30, Y0, Y8
    VPSLLD $30, Y1, Y9
    VPSLLD $30, Y2, Y10
    VPSLLD $30, Y3, Y11

    VMOVMSKPS Y8, R12
    VMOVMSKPS Y9, R13
    VMOVMSKPS Y10, R14
    VMOVMSKPS Y11, R15

    SHLQ $8, R13
    SHLQ $16, R14
    SHLQ $24, R15
    ORQ R13, R12
    ORQ R14, R12
    ORQ R15, R12

    MOVQ $0xAAAAAAAAAAAAAAAA, DI // 0b101010...
    PDEPQ DI, R12, R12

    ORQ R12, R8
    MOVQ R8, (AX)
    VZEROUPPER
    RET

// encodeMiniBlockInt32x32bitsAVX2 is a specialization of the bit packing logic
// for 32 bit integers when the output bit width is also 32, in which case a
// simple copy of the mini block to the output buffer produces the result.
//
// func encodeMiniBlockInt32x32bitsAVX2(dst *byte, src *[miniBlockSize]int32)
TEXT ·encodeMiniBlockInt32x32bitsAVX2(SB), NOSPLIT, $0-16
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX
    VMOVDQU 0(BX), Y0
    VMOVDQU 32(BX), Y1
    VMOVDQU 64(BX), Y2
    VMOVDQU 96(BX), Y3
    VMOVDQU Y0, 0(AX)
    VMOVDQU Y1, 32(AX)
    VMOVDQU Y2, 64(AX)
    VMOVDQU Y3, 96(AX)
    VZEROUPPER
    RET

// encodeMiniBlockInt32x3to16bitsAVX2 is the algorithm used to bit-pack 32 bit
// integers into values of width 3 to 16 bits.
//
// This function is a small overhead due to having to initialize registers with
// values that depend on the bit width. We measured this cost at ~10% throughput
// in synthetic benchmarks compared to generating constant shifts and offsets
// using a macro. Using a single function rather than generating one for each
// bit width has the benefit of reducing the code size, which in practice can
// also yield benefits like reducing CPU cache misses. Not using a macro also
// has other advantages like providing accurate line number of stack traces and
// enabling the use of breakpoints when debugging. Overall, this approach seemed
// to be the right trade off between performance and maintainability.
//
// The algorithm treats chunks of 8 values in 4 iterations to process all 32
// values of the mini block. Writes to the output buffer are aligned on 128 bits
// since we may write up to 128 bits (8 x 16 bits). Padding is therefore
// required in the output buffer to avoid triggering a segfault.
// The encodeInt32AVX2 method adds enough padding when sizing the output buffer
// to account for this requirement.
//
// We leverage the two lanes of YMM registers to work on two sets of 4 values
// (in the sequence of VMOVDQU/VPSHUFD, VPAND, VPSLLQ, VPOR), resulting in having
// two sets of bit-packed values in the lower 64 bits of each YMM lane.
// The upper lane is then permuted into a lower lane to merge the two results,
// which may not be aligned on byte boundaries so we shift the lower and upper
// bits and compose two sets of 128 bits sequences (VPSLLQ, VPSRLQ, VBLENDPD),
// merge them and write the 16 bytes result to the output buffer.
TEXT ·encodeMiniBlockInt32x3to16bitsAVX2(SB), NOSPLIT, $0-24
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX
    MOVQ bitWidth+16(FP), CX

    VPBROADCASTQ bitWidth+16(FP), Y6 // [1*bitWidth...]
    VPSLLQ $1, Y6, Y7                // [2*bitWidth...]
    VPADDQ Y6, Y7, Y8                // [3*bitWidth...]
    VPSLLQ $2, Y6, Y9                // [4*bitWidth...]

    VPBROADCASTQ sixtyfour<>(SB), Y10
    VPSUBQ Y6, Y10, Y11 // [64-1*bitWidth...]
    VPSUBQ Y9, Y10, Y12 // [64-4*bitWidth...]
    VPCMPEQQ Y4, Y4, Y4
    VPSRLVQ Y11, Y4, Y4

    VPXOR Y5, Y5, Y5
    XORQ SI, SI
loop:
    VMOVDQU (BX)(SI*4), Y0
    VPSHUFD $0b01010101, Y0, Y1
    VPSHUFD $0b10101010, Y0, Y2
    VPSHUFD $0b11111111, Y0, Y3

    VPAND Y4, Y0, Y0
    VPAND Y4, Y1, Y1
    VPAND Y4, Y2, Y2
    VPAND Y4, Y3, Y3

    VPSLLVQ Y6, Y1, Y1
    VPSLLVQ Y7, Y2, Y2
    VPSLLVQ Y8, Y3, Y3

    VPOR Y1, Y0, Y0
    VPOR Y3, Y2, Y2
    VPOR Y2, Y0, Y0

    VPERMQ $0b00001010, Y0, Y1

    VPSLLVQ X9, X1, X2
    VPSRLQ X12, X1, X3
    VBLENDPD $0b10, X3, X2, X1
    VBLENDPD $0b10, X5, X0, X0
    VPOR X1, X0, X0

    VMOVDQU X0, (AX)

    ADDQ CX, AX
    ADDQ $8, SI
    CMPQ SI, $miniBlockSize
    JNE loop
    VZEROUPPER
    RET

GLOBL sixtyfour<>(SB), RODATA|NOPTR, $32
DATA sixtyfour<>+0(SB)/8, $64
DATA sixtyfour<>+8(SB)/8, $64
DATA sixtyfour<>+16(SB)/8, $64
DATA sixtyfour<>+24(SB)/8, $64

// func decodeBlockInt32Default(dst []int32, minDelta, lastValue int32) int32
TEXT ·decodeBlockInt32Default(SB), NOSPLIT, $0-36
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), BX
    MOVLQZX minDelta+24(FP), CX
    MOVLQZX lastValue+28(FP), DX
    XORQ SI, SI
    JMP test
loop:
    MOVL (AX)(SI*4), DI
    ADDL CX, DI
    ADDL DI, DX
    MOVL DX, (AX)(SI*4)
    INCQ SI
test:
    CMPQ SI, BX
    JNE loop
done:
    MOVL DX, ret+32(FP)
    RET

// func decodeBlockInt32AVX2(dst []int32, minDelta, lastValue int32) int32
TEXT ·decodeBlockInt32AVX2(SB), NOSPLIT, $0-36
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), BX
    MOVLQZX minDelta+24(FP), CX
    MOVLQZX lastValue+28(FP), DX
    XORQ SI, SI

    CMPQ BX, $8
    JB test

    MOVQ BX, DI
    SHRQ $3, DI
    SHLQ $3, DI

    VPXOR X1, X1, X1
    MOVQ CX, X0
    MOVQ DX, X1
    VPBROADCASTD X0, Y0
loopAVX2:
    VMOVDQU (AX)(SI*4), Y2
    VPADDD Y0, Y2, Y2 // Y2[:] += minDelta
    VPADDD Y1, Y2, Y2 // Y2[0] += lastValue

    VPSLLDQ $4, Y2, Y3
    VPADDD Y3, Y2, Y2

    VPSLLDQ $8, Y2, Y3
    VPADDD Y3, Y2, Y2

    VPSHUFD $0xFF, X2, X1
    VPERM2I128 $1, Y2, Y2, Y3
    VPADDD X1, X3, X3

    VMOVDQU X2, (AX)(SI*4)
    VMOVDQU X3, 16(AX)(SI*4)
    VPSRLDQ $12, X3, X1 // lastValue

    ADDQ $8, SI
    CMPQ SI, DI
    JNE loopAVX2
    VZEROUPPER
    MOVQ X1, DX
    JMP test
loop:
    MOVL (AX)(SI*4), DI
    ADDL CX, DI
    ADDL DI, DX
    MOVL DX, (AX)(SI*4)
    INCQ SI
test:
    CMPQ SI, BX
    JNE loop
done:
    MOVL DX, ret+32(FP)
    RET

// -----------------------------------------------------------------------------
// 64 bits
// -----------------------------------------------------------------------------

#define deltaInt64AVX2x4(baseAddr)  \
    VMOVDQU baseAddr, Y1            \ // [0,1,2,3]
    VPERMQ $0b10010011, Y1, Y2      \ // [3,0,1,2]
    VPBLENDD $3, Y0, Y2, Y2         \ // [x,0,1,2]
    VPSUBQ Y2, Y1, Y2               \ // [0,1,2,3] - [x,0,1,2]
    VMOVDQU Y2, baseAddr            \
    VPERMQ $0b10010011, Y1, Y0

// func blockDeltaInt64AVX2(block *[blockSize]int64, lastValue int64) int64
TEXT ·blockDeltaInt64AVX2(SB), NOSPLIT, $0-24
    MOVQ block+0(FP), AX
    MOVQ 8*blockSize-8(AX), CX
    MOVQ CX, ret+16(FP)

    VPBROADCASTQ lastValue+8(FP), Y0
    XORQ SI, SI
loop:
    deltaInt64AVX2x4((AX)(SI*8))
    deltaInt64AVX2x4(32(AX)(SI*8))
    deltaInt64AVX2x4(64(AX)(SI*8))
    deltaInt64AVX2x4(96(AX)(SI*8))
    ADDQ $16, SI
    CMPQ SI, $blockSize
    JNE loop
    VZEROUPPER
    RET

// vpminsq is an emulation of the AVX-512 VPMINSQ instruction with AVX2.
#define vpminsq(ones, tmp, arg2, arg1, ret) \
    VPCMPGTQ arg1, arg2, tmp \
    VPBLENDVB tmp, arg1, arg2, ret

// func blockMinInt64AVX2(block *[blockSize]int64) int64
TEXT ·blockMinInt64AVX2(SB), NOSPLIT, $0-16
    MOVQ block+0(FP), AX
    XORQ SI, SI
    VPCMPEQQ Y9, Y9, Y9 // ones
    VPBROADCASTQ (AX), Y0
loop:
    VMOVDQU 0(AX)(SI*8), Y1
    VMOVDQU 32(AX)(SI*8), Y2
    VMOVDQU 64(AX)(SI*8), Y3
    VMOVDQU 96(AX)(SI*8), Y4
    VMOVDQU 128(AX)(SI*8), Y5
    VMOVDQU 160(AX)(SI*8), Y6
    VMOVDQU 192(AX)(SI*8), Y7
    VMOVDQU 224(AX)(SI*8), Y8

    vpminsq(Y9, Y10, Y0, Y1, Y1)
    vpminsq(Y9, Y11, Y0, Y2, Y2)
    vpminsq(Y9, Y12, Y0, Y3, Y3)
    vpminsq(Y9, Y13, Y0, Y4, Y4)
    vpminsq(Y9, Y14, Y0, Y5, Y5)
    vpminsq(Y9, Y15, Y0, Y6, Y6)
    vpminsq(Y9, Y10, Y0, Y7, Y7)
    vpminsq(Y9, Y11, Y0, Y8, Y8)

    vpminsq(Y9, Y12, Y2, Y1, Y1)
    vpminsq(Y9, Y13, Y4, Y3, Y3)
    vpminsq(Y9, Y14, Y6, Y5, Y5)
    vpminsq(Y9, Y15, Y8, Y7, Y7)

    vpminsq(Y9, Y10, Y3, Y1, Y1)
    vpminsq(Y9, Y11, Y7, Y5, Y5)
    vpminsq(Y9, Y12, Y5, Y1, Y0)

    ADDQ $32, SI
    CMPQ SI, $blockSize
    JNE loop

    VPERM2I128 $1, Y0, Y0, Y1
    vpminsq(Y9, Y10, Y1, Y0, Y0)

    MOVQ X0, CX
    VPEXTRQ $1, X0, BX
    CMPQ CX, BX
    CMOVQLT CX, BX
    MOVQ BX, ret+8(FP)
    VZEROUPPER
    RET

#define subInt64AVX2x32(baseAddr, offset) \
    VMOVDQU offset+0(baseAddr), Y1      \
    VMOVDQU offset+32(baseAddr), Y2     \
    VMOVDQU offset+64(baseAddr), Y3     \
    VMOVDQU offset+96(baseAddr), Y4     \
    VMOVDQU offset+128(baseAddr), Y5    \
    VMOVDQU offset+160(baseAddr), Y6    \
    VMOVDQU offset+192(baseAddr), Y7    \
    VMOVDQU offset+224(baseAddr), Y8    \
    VPSUBQ Y0, Y1, Y1                   \
    VPSUBQ Y0, Y2, Y2                   \
    VPSUBQ Y0, Y3, Y3                   \
    VPSUBQ Y0, Y4, Y4                   \
    VPSUBQ Y0, Y5, Y5                   \
    VPSUBQ Y0, Y6, Y6                   \
    VPSUBQ Y0, Y7, Y7                   \
    VPSUBQ Y0, Y8, Y8                   \
    VMOVDQU Y1, offset+0(baseAddr)      \
    VMOVDQU Y2, offset+32(baseAddr)     \
    VMOVDQU Y3, offset+64(baseAddr)     \
    VMOVDQU Y4, offset+96(baseAddr)     \
    VMOVDQU Y5, offset+128(baseAddr)    \
    VMOVDQU Y6, offset+160(baseAddr)    \
    VMOVDQU Y7, offset+192(baseAddr)    \
    VMOVDQU Y8, offset+224(baseAddr)

// func blockSubInt64AVX2(block *[blockSize]int64, value int64)
TEXT ·blockSubInt64AVX2(SB), NOSPLIT, $0-16
    MOVQ block+0(FP), AX
    VPBROADCASTQ value+8(FP), Y0
    subInt64AVX2x32(AX, 0)
    subInt64AVX2x32(AX, 256)
    subInt64AVX2x32(AX, 512)
    subInt64AVX2x32(AX, 768)
    VZEROUPPER
    RET

// vpmaxsq is an emulation of the AVX-512 VPMAXSQ instruction with AVX2.
#define vpmaxsq(tmp, arg2, arg1, ret) \
    VPCMPGTQ arg2, arg1, tmp \
    VPBLENDVB tmp, arg1, arg2, ret

// func blockBitWidthsInt64AVX2(bitWidths *[numMiniBlocks]byte, block *[blockSize]int64)
TEXT ·blockBitWidthsInt64AVX2(SB), NOSPLIT, $0-16
    MOVQ bitWidths+0(FP), AX
    MOVQ block+8(FP), BX

    // AVX2 only has signed comparisons (and min/max), we emulate working on
    // unsigned values by adding -2^64 to the values. Y9 is a vector of -2^64
    // used to offset 4 packed 64 bits integers in other YMM registers where
    // the block data are loaded.
    VPCMPEQQ Y9, Y9, Y9
    VPSLLQ $63, Y9, Y9

    XORQ DI, DI
loop:
    VPBROADCASTQ (BX), Y0 // max
    VPADDQ Y9, Y0, Y0

    VMOVDQU (BX), Y1
    VMOVDQU 32(BX), Y2
    VMOVDQU 64(BX), Y3
    VMOVDQU 96(BX), Y4
    VMOVDQU 128(BX), Y5
    VMOVDQU 160(BX), Y6
    VMOVDQU 192(BX), Y7
    VMOVDQU 224(BX), Y8

    VPADDQ Y9, Y1, Y1
    VPADDQ Y9, Y2, Y2
    VPADDQ Y9, Y3, Y3
    VPADDQ Y9, Y4, Y4
    VPADDQ Y9, Y5, Y5
    VPADDQ Y9, Y6, Y6
    VPADDQ Y9, Y7, Y7
    VPADDQ Y9, Y8, Y8

    vpmaxsq(Y10, Y2, Y1, Y1)
    vpmaxsq(Y11, Y4, Y3, Y3)
    vpmaxsq(Y12, Y6, Y5, Y5)
    vpmaxsq(Y13, Y8, Y7, Y7)

    vpmaxsq(Y10, Y3, Y1, Y1)
    vpmaxsq(Y11, Y7, Y5, Y5)
    vpmaxsq(Y12, Y5, Y1, Y1)
    vpmaxsq(Y13, Y1, Y0, Y0)

    VPERM2I128 $1, Y0, Y0, Y1
    vpmaxsq(Y10, Y1, Y0, Y0)
    VPSUBQ Y9, Y0, Y0

    MOVQ X0, CX
    VPEXTRQ $1, X0, DX
    CMPQ CX, DX
    CMOVQHI CX, DX

    LZCNTQ DX, DX
    NEGQ DX
    ADDQ $64, DX
    MOVB DX, (AX)(DI*1)

    ADDQ $256, BX
    INCQ DI
    CMPQ DI, $numMiniBlocks
    JNE loop
    VZEROUPPER
    RET

// encodeMiniBlockInt64Default is the generic implementation of the algorithm to
// pack 64 bit integers into values of a given bit width (<=64).
//
// This algorithm is much slower than the vectorized versions, but is useful
// as a reference implementation to run the tests against, and as fallback when
// the code runs on a CPU which does not support the AVX2 instruction set.
//
// func encodeMiniBlockInt64Default(dst *byte, src *[miniBlockSize]int64, bitWidth uint)
TEXT ·encodeMiniBlockInt64Default(SB), NOSPLIT, $0-24
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX
    MOVQ bitWidth+16(FP), R10

    XORQ R11, R11 // zero
    XORQ DI, DI // bitOffset
    XORQ SI, SI
loop:
    MOVQ DI, CX
    MOVQ DI, DX

    ANDQ $0b111111, CX // bitOffset % 64
    SHRQ $6, DX        // bitOffset / 64

    MOVQ (BX)(SI*8), R8
    MOVQ R8, R9
    SHLQ CX, R8
    NEGQ CX
    ADDQ $64, CX
    SHRQ CX, R9
    CMPQ CX, $64
    CMOVQEQ R11, R9 // needed because shifting by more than 63 is undefined

    ORQ R8, 0(AX)(DX*8)
    ORQ R9, 8(AX)(DX*8)

    ADDQ R10, DI
    INCQ SI
    CMPQ SI, $miniBlockSize
    JNE loop
    RET

// func encodeMiniBlockInt64x1bitAVX2(dst *byte, src *[miniBlockSize]int64)
TEXT ·encodeMiniBlockInt64x1bitAVX2(SB), NOSPLIT, $0-16
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX

    VMOVDQU 0(BX), Y0
    VMOVDQU 32(BX), Y1
    VMOVDQU 64(BX), Y2
    VMOVDQU 96(BX), Y3
    VMOVDQU 128(BX), Y4
    VMOVDQU 160(BX), Y5
    VMOVDQU 192(BX), Y6
    VMOVDQU 224(BX), Y7

    VPSLLQ $63, Y0, Y0
    VPSLLQ $63, Y1, Y1
    VPSLLQ $63, Y2, Y2
    VPSLLQ $63, Y3, Y3
    VPSLLQ $63, Y4, Y4
    VPSLLQ $63, Y5, Y5
    VPSLLQ $63, Y6, Y6
    VPSLLQ $63, Y7, Y7

    VMOVMSKPD Y0, R8
    VMOVMSKPD Y1, R9
    VMOVMSKPD Y2, R10
    VMOVMSKPD Y3, R11
    VMOVMSKPD Y4, R12
    VMOVMSKPD Y5, R13
    VMOVMSKPD Y6, R14
    VMOVMSKPD Y7, R15

    SHLL $4, R9
    SHLL $8, R10
    SHLL $12, R11
    SHLL $16, R12
    SHLL $20, R13
    SHLL $24, R14
    SHLL $28, R15

    ORL R9, R8
    ORL R11, R10
    ORL R13, R12
    ORL R15, R14
    ORL R10, R8
    ORL R14, R12
    ORL R12, R8

    MOVL R8, (AX)
    VZEROUPPER
    RET

// func encodeMiniBlockInt64x2bitsAVX2(dst *byte, src *[miniBlockSize]int64)
TEXT ·encodeMiniBlockInt64x2bitsAVX2(SB), NOSPLIT, $0-16
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX

    VMOVDQU 0(BX), Y8
    VMOVDQU 32(BX), Y9
    VMOVDQU 64(BX), Y10
    VMOVDQU 96(BX), Y11
    VMOVDQU 128(BX), Y12
    VMOVDQU 160(BX), Y13
    VMOVDQU 192(BX), Y14
    VMOVDQU 224(BX), Y15

    VPSLLQ $63, Y8, Y0
    VPSLLQ $63, Y9, Y1
    VPSLLQ $63, Y10, Y2
    VPSLLQ $63, Y11, Y3
    VPSLLQ $63, Y12, Y4
    VPSLLQ $63, Y13, Y5
    VPSLLQ $63, Y14, Y6
    VPSLLQ $63, Y15, Y7

    VMOVMSKPD Y0, R8
    VMOVMSKPD Y1, R9
    VMOVMSKPD Y2, R10
    VMOVMSKPD Y3, R11
    VMOVMSKPD Y4, R12
    VMOVMSKPD Y5, R13
    VMOVMSKPD Y6, R14
    VMOVMSKPD Y7, R15

    SHLQ $4, R9
    SHLQ $8, R10
    SHLQ $12, R11
    SHLQ $16, R12
    SHLQ $20, R13
    SHLQ $24, R14
    SHLQ $28, R15

    ORQ R9, R8
    ORQ R11, R10
    ORQ R13, R12
    ORQ R15, R14
    ORQ R10, R8
    ORQ R14, R12
    ORQ R12, R8

    MOVQ $0x5555555555555555, CX // 0b010101...
    PDEPQ CX, R8, CX

    VPSLLQ $62, Y8, Y8
    VPSLLQ $62, Y9, Y9
    VPSLLQ $62, Y10, Y10
    VPSLLQ $62, Y11, Y11
    VPSLLQ $62, Y12, Y12
    VPSLLQ $62, Y13, Y13
    VPSLLQ $62, Y14, Y14
    VPSLLQ $62, Y15, Y15

    VMOVMSKPD Y8, R8
    VMOVMSKPD Y9, R9
    VMOVMSKPD Y10, R10
    VMOVMSKPD Y11, R11
    VMOVMSKPD Y12, R12
    VMOVMSKPD Y13, R13
    VMOVMSKPD Y14, R14
    VMOVMSKPD Y15, R15

    SHLQ $4, R9
    SHLQ $8, R10
    SHLQ $12, R11
    SHLQ $16, R12
    SHLQ $20, R13
    SHLQ $24, R14
    SHLQ $28, R15

    ORQ R9, R8
    ORQ R11, R10
    ORQ R13, R12
    ORQ R15, R14
    ORQ R10, R8
    ORQ R14, R12
    ORQ R12, R8

    MOVQ $0xAAAAAAAAAAAAAAAA, DX // 0b101010...
    PDEPQ DX, R8, DX
    ORQ DX, CX
    MOVQ CX, (AX)
    VZEROUPPER
    RET

// func encodeMiniBlockInt64x64bitsAVX2(dst *byte, src *[miniBlockSize]int64)
TEXT ·encodeMiniBlockInt64x64bitsAVX2(SB), NOSPLIT, $0-16
    MOVQ dst+0(FP), AX
    MOVQ src+8(FP), BX
    VMOVDQU 0(BX), Y0
    VMOVDQU 32(BX), Y1
    VMOVDQU 64(BX), Y2
    VMOVDQU 96(BX), Y3
    VMOVDQU 128(BX), Y4
    VMOVDQU 160(BX), Y5
    VMOVDQU 192(BX), Y6
    VMOVDQU 224(BX), Y7
    VMOVDQU Y0, 0(AX)
    VMOVDQU Y1, 32(AX)
    VMOVDQU Y2, 64(AX)
    VMOVDQU Y3, 96(AX)
    VMOVDQU Y4, 128(AX)
    VMOVDQU Y5, 160(AX)
    VMOVDQU Y6, 192(AX)
    VMOVDQU Y7, 224(AX)
    VZEROUPPER
    RET

// func decodeBlockInt64Default(dst []int64, minDelta, lastValue int64) int64
TEXT ·decodeBlockInt64Default(SB), NOSPLIT, $0-48
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), BX
    MOVQ minDelta+24(FP), CX
    MOVQ lastValue+32(FP), DX
    XORQ SI, SI
    JMP test
loop:
    MOVQ (AX)(SI*8), DI
    ADDQ CX, DI
    ADDQ DI, DX
    MOVQ DX, (AX)(SI*8)
    INCQ SI
test:
    CMPQ SI, BX
    JNE loop
done:
    MOVQ DX, ret+40(FP)
    RET


================================================
FILE: encoding/delta/binary_packed_amd64_test.go
================================================
//go:build amd64 && !purego

package delta

import (
	"testing"

	"golang.org/x/sys/cpu"
)

func requireAVX2(t testing.TB) {
	if !cpu.X86.HasAVX2 {
		t.Skip("CPU does not support AVX2")
	}
}

func TestBlockDeltaInt32AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockDeltaInt32(t, blockDeltaInt32AVX2)
}

func TestBlockMinInt32AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockMinInt32(t, blockMinInt32AVX2)
}

func TestBlockSubInt32AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockSubInt32(t, blockSubInt32AVX2)
}

func TestBlockBitWidthsInt32AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockBitWidthsInt32(t, blockBitWidthsInt32AVX2)
}

func TestEncodeMiniBlockInt32AVX2(t *testing.T) {
	requireAVX2(t)
	testEncodeMiniBlockInt32(t,
		func(dst []byte, src *[miniBlockSize]int32, bitWidth uint) {
			encodeMiniBlockInt32AVX2(&dst[0], src, bitWidth)
		},
	)
}

func BenchmarkBlockDeltaInt32AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockDeltaInt32(b, blockDeltaInt32AVX2)
}

func BenchmarkBlockMinInt32AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockMinInt32(b, blockMinInt32AVX2)
}

func BenchmarkBlockSubInt32AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockSubInt32(b, blockSubInt32AVX2)
}

func BenchmarkBlockBitWidthsInt32AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockBitWidthsInt32(b, blockBitWidthsInt32AVX2)
}

func BenchmarkEncodeMiniBlockInt32AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkEncodeMiniBlockInt32(b,
		func(dst []byte, src *[miniBlockSize]int32, bitWidth uint) {
			encodeMiniBlockInt32AVX2(&dst[0], src, bitWidth)
		},
	)
}

func TestBlockDeltaInt64AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockDeltaInt64(t, blockDeltaInt64AVX2)
}

func TestBlockMinInt64AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockMinInt64(t, blockMinInt64AVX2)
}

func TestBlockSubInt64AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockSubInt64(t, blockSubInt64AVX2)
}

func TestBlockBitWidthsInt64AVX2(t *testing.T) {
	requireAVX2(t)
	testBlockBitWidthsInt64(t, blockBitWidthsInt64AVX2)
}

func TestEncodeMiniBlockInt64AVX2(t *testing.T) {
	requireAVX2(t)
	testEncodeMiniBlockInt64(t,
		func(dst []byte, src *[miniBlockSize]int64, bitWidth uint) {
			encodeMiniBlockInt64AVX2(&dst[0], src, bitWidth)
		},
	)
}

func BenchmarkBlockDeltaInt64AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockDeltaInt64(b, blockDeltaInt64AVX2)
}

func BenchmarkBlockMinInt64AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockMinInt64(b, blockMinInt64AVX2)
}

func BenchmarkBlockSubInt64AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockSubInt64(b, blockSubInt64AVX2)
}

func BenchmarkBlockBitWidthsInt64AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkBlockBitWidthsInt64(b, blockBitWidthsInt64AVX2)
}

func BenchmarkEncodeMiniBlockInt64AVX2(b *testing.B) {
	requireAVX2(b)
	benchmarkEncodeMiniBlockInt64(b,
		func(dst []byte, src *[miniBlockSize]int64, bitWidth uint) {
			encodeMiniBlockInt64AVX2(&dst[0], src, bitWidth)
		},
	)
}


================================================
FILE: encoding/delta/binary_packed_purego.go
================================================
//go:build purego || !amd64

package delta

import (
	"encoding/binary"
)

func encodeMiniBlockInt32(dst []byte, src *[miniBlockSize]int32, bitWidth uint) {
	bitMask := uint32(1<<bitWidth) - 1
	bitOffset := uint(0)

	for _, value := range src {
		i := bitOffset / 32
		j := bitOffset % 32

		lo := binary.LittleEndian.Uint32(dst[(i+0)*4:])
		hi := binary.LittleEndian.Uint32(dst[(i+1)*4:])

		lo |= (uint32(value) & bitMask) << j
		hi |= (uint32(value) >> (32 - j))

		binary.LittleEndian.PutUint32(dst[(i+0)*4:], lo)
		binary.LittleEndian.PutUint32(dst[(i+1)*4:], hi)

		bitOffset += bitWidth
	}
}

func encodeMiniBlockInt64(dst []byte, src *[miniBlockSize]int64, bitWidth uint) {
	bitMask := uint64(1<<bitWidth) - 1
	bitOffset := uint(0)

	for _, value := range src {
		i := bitOffset / 64
		j := bitOffset % 64

		lo := binary.LittleEndian.Uint64(dst[(i+0)*8:])
		hi := binary.LittleEndian.Uint64(dst[(i+1)*8:])

		lo |= (uint64(value) & bitMask) << j
		hi |= (uint64(value) >> (64 - j))

		binary.LittleEndian.PutUint64(dst[(i+0)*8:], lo)
		binary.LittleEndian.PutUint64(dst[(i+1)*8:], hi)

		bitOffset += bitWidth
	}
}

func decodeBlockInt32(block []int32, minDelta, lastValue int32) int32 {
	for i := range block {
		block[i] += minDelta
		block[i] += lastValue
		lastValue = block[i]
	}
	return lastValue
}

func decodeBlockInt64(block []int64, minDelta, lastValue int64) int64 {
	for i := range block {
		block[i] += minDelta
		block[i] += lastValue
		lastValue = block[i]
	}
	return lastValue
}

func decodeMiniBlockInt32(dst []int32, src []uint32, bitWidth uint) {
	bitMask := uint32(1<<bitWidth) - 1
	bitOffset := uint(0)

	for n := range dst {
		i := bitOffset / 32
		j := bitOffset % 32
		d := (src[i] & (bitMask << j)) >> j
		if j+bitWidth > 32 {
			k := 32 - j
			d |= (src[i+1] & (bitMask >> k)) << k
		}
		dst[n] = int32(d)
		bitOffset += bitWidth
	}
}

func decodeMiniBlockInt64(dst []int64, src []uint32, bitWidth uint) {
	bitMask := uint64(1<<bitWidth) - 1
	bitOffset := uint(0)

	for n := range dst {
		i := bitOffset / 32
		j := bitOffset % 32
		d := (uint64(src[i]) & (bitMask << j)) >> j
		if j+bitWidth > 32 {
			k := 32 - j
			d |= (uint64(src[i+1]) & (bitMask >> k)) << k
			if j+bitWidth > 64 {
				k := 64 - j
				d |= (uint64(src[i+2]) & (bitMask >> k)) << k
			}
		}
		dst[n] = int64(d)
		bitOffset += bitWidth
	}
}


================================================
FILE: encoding/delta/binary_packed_test.go
================================================
package delta

import (
	"bytes"
	"fmt"
	"math/bits"
	"testing"
)

func maxLen32(miniBlock []int32) (maxLen int) {
	for _, v := range miniBlock {
		if n := bits.Len32(uint32(v)); n > maxLen {
			maxLen = n
		}
	}
	return maxLen
}

func maxLen64(miniBlock []int64) (maxLen int) {
	for _, v := range miniBlock {
		if n := bits.Len64(uint64(v)); n > maxLen {
			maxLen = n
		}
	}
	return maxLen
}

func TestBlockDeltaInt32(t *testing.T) {
	testBlockDeltaInt32(t, blockDeltaInt32)
}

func testBlockDeltaInt32(t *testing.T, f func(*[blockSize]int32, int32) int32) {
	t.Helper()
	block := [blockSize]int32{}
	for i := range block {
		block[i] = int32(2 * (i + 1))
	}
	lastValue := f(&block, 0)
	if lastValue != 2*blockSize {
		t.Errorf("wrong last block value: want=%d got=%d", 2*blockSize, lastValue)
	}
	for i := range block {
		j := int32(2 * (i + 0))
		k := int32(2 * (i + 1))
		if block[i] != (k - j) {
			t.Errorf("wrong block delta at index %d: want=%d got=%d", i, k-j, block[i])
		}
	}
}

func TestBlockMinInt32(t *testing.T) {
	testBlockMinInt32(t, blockMinInt32)
}

func testBlockMinInt32(t *testing.T, f func(*[blockSize]int32) int32) {
	t.Helper()
	block := [blockSize]int32{}
	for i := range block {
		block[i] = blockSize - int32(i)
	}
	if min := f(&block); min != 1 {
		t.Errorf("wrong min block value: want=1 got=%d", min)
	}
}

func TestBlockSubInt32(t *testing.T) {
	testBlockSubInt32(t, blockSubInt32)
}

func testBlockSubInt32(t *testing.T, f func(*[blockSize]int32, int32)) {
	t.Helper()
	block := [blockSize]int32{}
	for i := range block {
		block[i] = int32(i)
	}
	f(&block, 1)
	for i := range block {
		if block[i] != int32(i-1) {
			t.Errorf("wrong block value at index %d: want=%d got=%d", i, i-1, block[i])
		}
	}
}

func TestBlockBitWidthsInt32(t *testing.T) {
	testBlockBitWidthsInt32(t, blockBitWidthsInt32)
}

func testBlockBitWidthsInt32(t *testing.T, f func(*[numMiniBlocks]byte, *[blockSize]int32)) {
	t.Helper()
	bitWidths := [numMiniBlocks]byte{}
	block := [blockSize]int32{}
	for i := range block {
		block[i] = int32(i)
	}
	f(&bitWidths, &block)

	want := [numMiniBlocks]byte{}
	for i := range want {
		j := (i + 0) * miniBlockSize
		k := (i + 1) * miniBlockSize
		want[i] = byte(maxLen32(block[j:k]))
	}

	if bitWidths != want {
		t.Errorf("wrong bit widths: want=%d got=%d", want, bitWidths)
	}
}

func TestEncodeMiniBlockInt32(t *testing.T) {
	testEncodeMiniBlockInt32(t, encodeMiniBlockInt32)
}

func testEncodeMiniBlockInt32(t *testing.T, f func([]byte, *[miniBlockSize]int32, uint)) {
	t.Helper()
	for bitWidth := uint(1); bitWidth <= 32; bitWidth++ {
		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
			got := [4*miniBlockSize + 32]byte{}
			src := [miniBlockSize]int32{}
			for i := range src {
				src[i] = int32(i) & int32((1<<bitWidth)-1)
			}

			want := [4*miniBlockSize + 32]byte{}
			bitOffset := uint(0)

			for _, bits := range src {
				for b := uint(0); b < bitWidth; b++ {
					x := bitOffset / 8
					y := bitOffset % 8
					want[x] |= byte(((bits >> b) & 1) << y)
					bitOffset++
				}
			}

			f(got[:], &src, bitWidth)
			n := (miniBlockSize * bitWidth) / 8

			if !bytes.Equal(want[:n], got[:n]) {
				t.Errorf("output mismatch: want=%08x got=%08x", want[:n], got[:n])
			}
		})
	}
}

func BenchmarkBlockDeltaInt32(b *testing.B) {
	benchmarkBlockDeltaInt32(b, blockDeltaInt32)
}

func benchmarkBlockDeltaInt32(b *testing.B, f func(*[blockSize]int32, int32) int32) {
	b.SetBytes(4 * blockSize)
	block := [blockSize]int32{}
	for i := 0; i < b.N; i++ {
		_ = f(&block, 0)
	}
}

func BenchmarkBlockMinInt32(b *testing.B) {
	benchmarkBlockMinInt32(b, blockMinInt32)
}

func benchmarkBlockMinInt32(b *testing.B, f func(*[blockSize]int32) int32) {
	b.SetBytes(4 * blockSize)
	block := [blockSize]int32{}
	for i := 0; i < b.N; i++ {
		_ = f(&block)
	}
}

func BenchmarkBlockSubInt32(b *testing.B) {
	benchmarkBlockSubInt32(b, blockSubInt32)
}

func benchmarkBlockSubInt32(b *testing.B, f func(*[blockSize]int32, int32)) {
	b.SetBytes(4 * blockSize)
	block := [blockSize]int32{}
	for i := 0; i < b.N; i++ {
		f(&block, 42)
	}
}

func BenchmarkBlockBitWidthsInt32(b *testing.B) {
	benchmarkBlockBitWidthsInt32(b, blockBitWidthsInt32)
}

func benchmarkBlockBitWidthsInt32(b *testing.B, f func(*[numMiniBlocks]byte, *[blockSize]int32)) {
	b.SetBytes(4 * blockSize)
	bitWidths := [numMiniBlocks]byte{}
	block := [blockSize]int32{}
	for i := 0; i < b.N; i++ {
		f(&bitWidths, &block)
	}
}

func BenchmarkEncodeMiniBlockInt32(b *testing.B) {
	benchmarkEncodeMiniBlockInt32(b, encodeMiniBlockInt32)
}

func benchmarkEncodeMiniBlockInt32(b *testing.B, f func([]byte, *[miniBlockSize]int32, uint)) {
	for bitWidth := uint(1); bitWidth <= 32; bitWidth++ {
		b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) {
			b.SetBytes(4 * miniBlockSize)
			dst := [4*miniBlockSize + 32]byte{}
			src := [miniBlockSize]int32{}
			for i := 0; i < b.N; i++ {
				f(dst[:], &src, bitWidth)
			}
		})
	}
}

func TestBlockDeltaInt64(t *testing.T) {
	testBlockDeltaInt64(t, blockDeltaInt64)
}

func testBlockDeltaInt64(t *testing.T, f func(*[blockSize]int64, int64) int64) {
	t.Helper()
	block := [blockSize]int64{}
	for i := range block {
		block[i] = int64(2 * (i + 1))
	}
	lastValue := f(&block, 0)
	if lastValue != 2*blockSize {
		t.Errorf("wrong last block value: want=%d got=%d", 2*blockSize, lastValue)
	}
	for i := range block {
		j := int64(2 * (i + 0))
		k := int64(2 * (i + 1))
		if block[i] != (k - j) {
			t.Errorf("wrong block delta at index %d: want=%d got=%d", i, k-j, block[i])
		}
	}
}

func TestBlockMinInt64(t *testing.T) {
	testBlockMinInt64(t, blockMinInt64)
}

func testBlockMinInt64(t *testing.T, f func(*[blockSize]int64) int64) {
	block := [blockSize]int64{}
	for i := range block {
		block[i] = blockSize - int64(i)
	}
	if min := f(&block); min != 1 {
		t.Errorf("wrong min block value: want=1 got=%d", min)
	}
}

func TestBlockSubInt64(t *testing.T) {
	testBlockSubInt64(t, blockSubInt64)
}

func testBlockSubInt64(t *testing.T, f func(*[blockSize]int64, int64)) {
	block := [blockSize]int64{}
	for i := range block {
		block[i] = int64(i)
	}
	f(&block, 1)
	for i := range block {
		if block[i] != int64(i-1) {
			t.Errorf("wrong block value at index %d: want=%d got=%d", i, i-1, block[i])
		}
	}
}

func TestBlockBitWidthsInt64(t *testing.T) {
	testBlockBitWidthsInt64(t, blockBitWidthsInt64)
}

func testBlockBitWidthsInt64(t *testing.T, f func(*[numMiniBlocks]byte, *[blockSize]int64)) {
	bitWidths := [numMiniBlocks]byte{}
	block := [blockSize]int64{}
	for i := range block {
		block[i] = int64(i)
	}
	f(&bitWidths, &block)

	want := [numMiniBlocks]byte{}
	for i := range want {
		j := (i + 0) * miniBlockSize
		k := (i + 1) * miniBlockSize
		want[i] = byte(maxLen64(block[j:k]))
	}

	if bitWidths != want {
		t.Errorf("wrong bit widths: want=%d got=%d", want, bitWidths)
	}
}

func TestEncodeMiniBlockInt64(t *testing.T) {
	testEncodeMiniBlockInt64(t, encodeMiniBlockInt64)
}

func testEncodeMiniBlockInt64(t *testing.T, f func([]byte, *[miniBlockSize]int64, uint)) {
	for bitWidth := uint(1); bitWidth <= 64; bitWidth++ {
		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
			got := [8*miniBlockSize + 64]byte{}
			src := [miniBlockSize]int64{}
			for i := range src {
				src[i] = int64(i) & int64((1<<bitWidth)-1)
			}

			want := [8*miniBlockSize + 64]byte{}
			bitOffset := uint(0)

			for _, bits := range src {
				for b := uint(0); b < bitWidth; b++ {
					x := bitOffset / 8
					y := bitOffset % 8
					want[x] |= byte(((bits >> b) & 1) << y)
					bitOffset++
				}
			}

			f(got[:], &src, bitWidth)
			n := (miniBlockSize * bitWidth) / 8

			if !bytes.Equal(want[:n], got[:n]) {
				t.Errorf("output mismatch: want=%08x got=%08x", want[:n], got[:n])
			}
		})
	}
}

func BenchmarkBlockDeltaInt64(b *testing.B) {
	benchmarkBlockDeltaInt64(b, blockDeltaInt64)
}

func benchmarkBlockDeltaInt64(b *testing.B, f func(*[blockSize]int64, int64) int64) {
	b.SetBytes(8 * blockSize)
	block := [blockSize]int64{}
	for i := 0; i < b.N; i++ {
		_ = f(&block, 0)
	}
}

func BenchmarkBlockMinInt64(b *testing.B) {
	benchmarkBlockMinInt64(b, blockMinInt64)
}

func benchmarkBlockMinInt64(b *testing.B, f func(*[blockSize]int64) int64) {
	b.SetBytes(8 * blockSize)
	block := [blockSize]int64{}
	for i := 0; i < b.N; i++ {
		_ = f(&block)
	}
}

func BenchmarkBlockSubInt64(b *testing.B) {
	benchmarkBlockSubInt64(b, blockSubInt64)
}

func benchmarkBlockSubInt64(b *testing.B, f func(*[blockSize]int64, int64)) {
	b.SetBytes(8 * blockSize)
	block := [blockSize]int64{}
	for i := 0; i < b.N; i++ {
		f(&block, 42)
	}
}

func BenchmarkBlockBitWidthsInt64(b *testing.B) {
	benchmarkBlockBitWidthsInt64(b, blockBitWidthsInt64)
}

func benchmarkBlockBitWidthsInt64(b *testing.B, f func(*[numMiniBlocks]byte, *[blockSize]int64)) {
	b.SetBytes(8 * blockSize)
	bitWidths := [numMiniBlocks]byte{}
	block := [blockSize]int64{}
	for i := 0; i < b.N; i++ {
		f(&bitWidths, &block)
	}
}

func BenchmarkEncodeMiniBlockInt64(b *testing.B) {
	benchmarkEncodeMiniBlockInt64(b, encodeMiniBlockInt64)
}

func benchmarkEncodeMiniBlockInt64(b *testing.B, f func([]byte, *[miniBlockSize]int64, uint)) {
	for bitWidth := uint(1); bitWidth <= 64; bitWidth++ {
		b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) {
			b.SetBytes(8 * miniBlockSize)
			dst := [8*miniBlockSize + 64]byte{}
			src := [miniBlockSize]int64{}
			for i := 0; i < b.N; i++ {
				f(dst[:], &src, bitWidth)
			}
		})
	}
}


================================================
FILE: encoding/delta/byte_array.go
================================================
package delta

import (
	"bytes"
	"sort"

	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
)

const (
	maxLinearSearchPrefixLength = 64 // arbitrary
)

type ByteArrayEncoding struct {
	encoding.NotSupported
}

func (e *ByteArrayEncoding) String() string {
	return "DELTA_BYTE_ARRAY"
}

func (e *ByteArrayEncoding) Encoding() format.Encoding {
	return format.DeltaByteArray
}

func (e *ByteArrayEncoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
	prefix := getInt32Buffer()
	defer putInt32Buffer(prefix)

	length := getInt32Buffer()
	defer putInt32Buffer(length)

	totalSize := 0
	if len(offsets) > 0 {
		lastValue := ([]byte)(nil)
		baseOffset := offsets[0]

		for _, endOffset := range offsets[1:] {
			v := src[baseOffset:endOffset:endOffset]
			n := int(endOffset - baseOffset)
			p := 0
			baseOffset = endOffset

			if len(v) <= maxLinearSearchPrefixLength {
				p = linearSearchPrefixLength(lastValue, v)
			} else {
				p = binarySearchPrefixLength(lastValue, v)
			}

			prefix.values = append(prefix.values, int32(p))
			length.values = append(length.values, int32(n-p))
			lastValue = v
			totalSize += n - p
		}
	}

	dst = dst[:0]
	dst = encodeInt32(dst, prefix.values)
	dst = encodeInt32(dst, length.values)
	dst = resize(dst, len(dst)+totalSize)

	if len(offsets) > 0 {
		b := dst[len(dst)-totalSize:]
		i := int(offsets[0])
		j := 0

		_ = length.values[:len(prefix.values)]

		for k, p := range prefix.values {
			n := p + length.values[k]
			j += copy(b[j:], src[i+int(p):i+int(n)])
			i += int(n)
		}
	}

	return dst, nil
}

func (e *ByteArrayEncoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
	// The parquet specs say that this encoding is only supported for BYTE_ARRAY
	// values, but the reference Java implementation appears to support
	// FIXED_LEN_BYTE_ARRAY as well:
	// https://github.com/apache/parquet-mr/blob/5608695f5777de1eb0899d9075ec9411cfdf31d3/parquet-column/src/main/java/org/apache/parquet/column/Encoding.java#L211
	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
		return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument)
	}
	if (len(src) % size) != 0 {
		return dst[:0], encoding.ErrEncodeInvalidInputSize(e, "FIXED_LEN_BYTE_ARRAY", len(src))
	}

	prefix := getInt32Buffer()
	defer putInt32Buffer(prefix)

	length := getInt32Buffer()
	defer putInt32Buffer(length)

	totalSize := 0
	lastValue := ([]byte)(nil)

	for i := size; i <= len(src); i += size {
		v := src[i-size : i : i]
		p := linearSearchPrefixLength(lastValue, v)
		n := size - p
		prefix.values = append(prefix.values, int32(p))
		length.values = append(length.values, int32(n))
		lastValue = v
		totalSize += n
	}

	dst = dst[:0]
	dst = encodeInt32(dst, prefix.values)
	dst = encodeInt32(dst, length.values)
	dst = resize(dst, len(dst)+totalSize)

	b := dst[len(dst)-totalSize:]
	i := 0
	j := 0

	for _, p := range prefix.values {
		j += copy(b[j:], src[i+int(p):i+size])
		i += size
	}

	return dst, nil
}

func (e *ByteArrayEncoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) {
	dst, offsets = dst[:0], offsets[:0]

	prefix := getInt32Buffer()
	defer putInt32Buffer(prefix)

	suffix := getInt32Buffer()
	defer putInt32Buffer(suffix)

	var err error
	src, err = prefix.decode(src)
	if err != nil {
		return dst, offsets, e.wrapf("decoding prefix lengths: %w", err)
	}
	src, err = suffix.decode(src)
	if err != nil {
		return dst, offsets, e.wrapf("decoding suffix lengths: %w", err)
	}
	if len(prefix.values) != len(suffix.values) {
		return dst, offsets, e.wrap(errPrefixAndSuffixLengthMismatch(len(prefix.values), len(suffix.values)))
	}
	return decodeByteArray(dst, src, prefix.values, suffix.values, offsets)
}

func (e *ByteArrayEncoding) DecodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
	dst = dst[:0]

	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
		return dst, e.wrap(encoding.ErrInvalidArgument)
	}

	prefix := getInt32Buffer()
	defer putInt32Buffer(prefix)

	suffix := getInt32Buffer()
	defer putInt32Buffer(suffix)

	var err error
	src, err = prefix.decode(src)
	if err != nil {
		return dst, e.wrapf("decoding prefix lengths: %w", err)
	}
	src, err = suffix.decode(src)
	if err != nil {
		return dst, e.wrapf("decoding suffix lengths: %w", err)
	}
	if len(prefix.values) != len(suffix.values) {
		return dst, e.wrap(errPrefixAndSuffixLengthMismatch(len(prefix.values), len(suffix.values)))
	}
	return decodeFixedLenByteArray(dst[:0], src, size, prefix.values, suffix.values)
}

func (e *ByteArrayEncoding) EstimateDecodeByteArraySize(src []byte) int {
	length := getInt32Buffer()
	defer putInt32Buffer(length)
	src, _ = length.decode(src)
	sum := int(length.sum())
	length.decode(src)
	return sum + int(length.sum())
}

func (e *ByteArrayEncoding) wrap(err error) error {
	if err != nil {
		err = encoding.Error(e, err)
	}
	return err
}

func (e *ByteArrayEncoding) wrapf(msg string, args ...interface{}) error {
	return encoding.Errorf(e, msg, args...)
}

func linearSearchPrefixLength(base, data []byte) (n int) {
	for n < len(base) && n < len(data) && base[n] == data[n] {
		n++
	}
	return n
}

func binarySearchPrefixLength(base, data []byte) int {
	n := len(base)
	if n > len(data) {
		n = len(data)
	}
	return sort.Search(n, func(i int) bool {
		return !bytes.Equal(base[:i+1], data[:i+1])
	})
}


================================================
FILE: encoding/delta/byte_array_amd64.go
================================================
//go:build !purego

package delta

import (
	"golang.org/x/sys/cpu"
)

//go:noescape
func validatePrefixAndSuffixLengthValuesAVX2(prefix, suffix []int32, maxLength int) (totalPrefixLength, totalSuffixLength int, ok bool)

func validatePrefixAndSuffixLengthValues(prefix, suffix []int32, maxLength int) (totalPrefixLength, totalSuffixLength int, err error) {
	if cpu.X86.HasAVX2 {
		totalPrefixLength, totalSuffixLength, ok := validatePrefixAndSuffixLengthValuesAVX2(prefix, suffix, maxLength)
		if ok {
			return totalPrefixLength, totalSuffixLength, nil
		}
	}

	lastValueLength := 0

	for i := range prefix {
		p := int(prefix[i])
		n := int(suffix[i])
		if p < 0 {
			err = errInvalidNegativePrefixLength(p)
			return
		}
		if n < 0 {
			err = errInvalidNegativeValueLength(n)
			return
		}
		if p > lastValueLength {
			err = errPrefixLengthOutOfBounds(p, lastValueLength)
			return
		}
		totalPrefixLength += p
		totalSuffixLength += n
		lastValueLength = p + n
	}

	if totalSuffixLength > maxLength {
		err = errValueLengthOutOfBounds(totalSuffixLength, maxLength)
		return
	}

	return totalPrefixLength, totalSuffixLength, nil
}

//go:noescape
func decodeByteArrayOffsets(offsets []uint32, prefix, suffix []int32)

//go:noescape
func decodeByteArrayAVX2(dst, src []byte, prefix, suffix []int32) int

func decodeByteArray(dst, src []byte, prefix, suffix []int32, offsets []uint32) ([]byte, []uint32, error) {
	totalPrefixLength, totalSuffixLength, err := validatePrefixAndSuffixLengthValues(prefix, suffix, len(src))
	if err != nil {
		return dst, offsets, err
	}

	totalLength := totalPrefixLength + totalSuffixLength
	dst = resizeNoMemclr(dst, totalLength+padding)

	if size := len(prefix) + 1; cap(offsets) < size {
		offsets = make([]uint32, size)
	} else {
		offsets = offsets[:size]
	}

	_ = prefix[:len(suffix)]
	_ = suffix[:len(prefix)]
	decodeByteArrayOffsets(offsets, prefix, suffix)

	var lastValue []byte
	var i int
	var j int

	if cpu.X86.HasAVX2 && len(src) > padding {
		k := len(suffix)
		n := 0

		for k > 0 && n < padding {
			k--
			n += int(suffix[k])
		}

		if k > 0 && n >= padding {
			i = decodeByteArrayAVX2(dst, src, prefix[:k], suffix[:k])
			j = len(src) - n
			lastValue = dst[i-(int(prefix[k-1])+int(suffix[k-1])):]
			prefix = prefix[k:]
			suffix = suffix[k:]
		}
	}

	for k := range prefix {
		p := int(prefix[k])
		n := int(suffix[k])
		lastValueOffset := i
		i += copy(dst[i:], lastValue[:p])
		i += copy(dst[i:], src[j:j+n])
		j += n
		lastValue = dst[lastValueOffset:]
	}

	return dst[:totalLength], offsets, nil
}

//go:noescape
func decodeByteArrayAVX2x128bits(dst, src []byte, prefix, suffix []int32) int

func decodeFixedLenByteArray(dst, src []byte, size int, prefix, suffix []int32) ([]byte, error) {
	totalPrefixLength, totalSuffixLength, err := validatePrefixAndSuffixLengthValues(prefix, suffix, len(src))
	if err != nil {
		return dst, err
	}

	totalLength := totalPrefixLength + totalSuffixLength
	dst = resizeNoMemclr(dst, totalLength+padding)

	_ = prefix[:len(suffix)]
	_ = suffix[:len(prefix)]

	var lastValue []byte
	var i int
	var j int

	if cpu.X86.HasAVX2 && len(src) > padding {
		k := len(suffix)
		n := 0

		for k > 0 && n < padding {
			k--
			n += int(suffix[k])
		}

		if k > 0 && n >= padding {
			if size == 16 {
				i = decodeByteArrayAVX2x128bits(dst, src, prefix[:k], suffix[:k])
			} else {
				i = decodeByteArrayAVX2(dst, src, prefix[:k], suffix[:k])
			}
			j = len(src) - n
			prefix = prefix[k:]
			suffix = suffix[k:]
			if i >= size {
				lastValue = dst[i-size:]
			}
		}
	}

	for k := range prefix {
		p := int(prefix[k])
		n := int(suffix[k])
		k := i
		i += copy(dst[i:], lastValue[:p])
		i += copy(dst[i:], src[j:j+n])
		j += n
		lastValue = dst[k:]
	}

	return dst[:totalLength], nil
}


================================================
FILE: encoding/delta/byte_array_amd64.s
================================================
//go:build !purego

#include "funcdata.h"
#include "textflag.h"

// func validatePrefixAndSuffixLengthValuesAVX2(prefix, suffix []int32, maxLength int) (totalPrefixLength, totalSuffixLength int, ok bool)
TEXT ·validatePrefixAndSuffixLengthValuesAVX2(SB), NOSPLIT, $0-73
    MOVQ prefix_base+0(FP), AX
    MOVQ suffix_base+24(FP), BX
    MOVQ suffix_len+32(FP), CX
    MOVQ maxLength+48(FP), DX

    XORQ SI, SI
    XORQ DI, DI // lastValueLength
    XORQ R8, R8
    XORQ R9, R9
    XORQ R10, R10 // totalPrefixLength
    XORQ R11, R11 // totalSuffixLength
    XORQ R12, R12 // ok

    CMPQ CX, $8
    JB test

    MOVQ CX, R13
    SHRQ $3, R13
    SHLQ $3, R13

    VPXOR X0, X0, X0 // lastValueLengths
    VPXOR X1, X1, X1 // totalPrefixLengths
    VPXOR X2, X2, X2 // totalSuffixLengths
    VPXOR X3, X3, X3 // negative prefix length sentinels
    VPXOR X4, X4, X4 // negative suffix length sentinels
    VPXOR X5, X5, X5 // prefix length overflow sentinels
    VMOVDQU ·rotateLeft32(SB), Y6

loopAVX2:
    VMOVDQU (AX)(SI*4), Y7 // p
    VMOVDQU (BX)(SI*4), Y8 // n

    VPADDD Y7, Y1, Y1
    VPADDD Y8, Y2, Y2

    VPOR Y7, Y3, Y3
    VPOR Y8, Y4, Y4

    VPADDD Y7, Y8, Y9 // p + n
    VPERMD Y0, Y6, Y10
    VPBLENDD $1, Y10, Y9, Y10
    VPCMPGTD Y10, Y7, Y10
    VPOR Y10, Y5, Y5

    VMOVDQU Y9, Y0
    ADDQ $8, SI
    CMPQ SI, R13
    JNE loopAVX2

    // If any of the sentinel values has its most significant bit set then one
    // of the values was negative or one of the prefixes was greater than the
    // length of the previous value, return false.
    VPOR Y4, Y3, Y3
    VPOR Y5, Y3, Y3
    VMOVMSKPS Y3, R13
    CMPQ R13, $0
    JNE done

    // We computed 8 sums in parallel for the prefix and suffix arrays, they
    // need to be accumulated into single values, which is what these reduction
    // steps do.
    VPSRLDQ $4, Y1, Y5
    VPSRLDQ $8, Y1, Y6
    VPSRLDQ $12, Y1, Y7
    VPADDD Y5, Y1, Y1
    VPADDD Y6, Y1, Y1
    VPADDD Y7, Y1, Y1
    VPERM2I128 $1, Y1, Y1, Y0
    VPADDD Y0, Y1, Y1
    MOVQ X1, R10
    ANDQ $0x7FFFFFFF, R10

    VPSRLDQ $4, Y2, Y5
    VPSRLDQ $8, Y2, Y6
    VPSRLDQ $12, Y2, Y7
    VPADDD Y5, Y2, Y2
    VPADDD Y6, Y2, Y2
    VPADDD Y7, Y2, Y2
    VPERM2I128 $1, Y2, Y2, Y0
    VPADDD Y0, Y2, Y2
    MOVQ X2, R11
    ANDQ $0x7FFFFFFF, R11

    JMP test
loop:
    MOVLQSX (AX)(SI*4), R8
    MOVLQSX (BX)(SI*4), R9

    CMPQ R8, $0 // p < 0 ?
    JL done

    CMPQ R9, $0 // n < 0 ?
    JL done

    CMPQ R8, DI // p > lastValueLength ?
    JG done

    ADDQ R8, R10
    ADDQ R9, R11
    ADDQ R8, DI
    ADDQ R9, DI

    INCQ SI
test:
    CMPQ SI, CX
    JNE loop

    CMPQ R11, DX // totalSuffixLength > maxLength ?
    JG done

    MOVB $1, R12
done:
    MOVQ R10, totalPrefixLength+56(FP)
    MOVQ R11, totalSuffixLength+64(FP)
    MOVB R12, ok+72(FP)
    RET

// func decodeByteArrayOffsets(offsets []uint32, prefix, suffix []int32)
TEXT ·decodeByteArrayOffsets(SB), NOSPLIT, $0-72
    MOVQ offsets_base+0(FP), AX
    MOVQ prefix_base+24(FP), BX
    MOVQ suffix_base+48(FP), CX
    MOVQ suffix_len+56(FP), DX

    XORQ SI, SI
    XORQ R10, R10
    JMP test
loop:
    MOVL (BX)(SI*4), R8
    MOVL (CX)(SI*4), R9
    MOVL R10, (AX)(SI*4)
    ADDL R8, R10
    ADDL R9, R10
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
    MOVL R10, (AX)(SI*4)
    RET

// func decodeByteArrayAVX2(dst, src []byte, prefix, suffix []int32) int
TEXT ·decodeByteArrayAVX2(SB), NOSPLIT, $0-104
    MOVQ dst_base+0(FP), AX
    MOVQ src_base+24(FP), BX
    MOVQ prefix_base+48(FP), CX
    MOVQ suffix_base+72(FP), DX
    MOVQ suffix_len+80(FP), DI

    XORQ SI, SI
    XORQ R8, R8
    XORQ R9, R9
    MOVQ AX, R10 // last value

    JMP test
loop:
    MOVLQZX (CX)(SI*4), R8 // prefix length
    MOVLQZX (DX)(SI*4), R9 // suffix length
prefix:
    VMOVDQU (R10), Y0
    VMOVDQU Y0, (AX)
    CMPQ R8, $32
    JA copyPrefix
suffix:
    VMOVDQU (BX), Y1
    VMOVDQU Y1, (AX)(R8*1)
    CMPQ R9, $32
    JA copySuffix
next:
    MOVQ AX, R10
    ADDQ R9, R8
    LEAQ (AX)(R8*1), AX
    LEAQ (BX)(R9*1), BX
    INCQ SI
test:
    CMPQ SI, DI
    JNE loop
    MOVQ dst_base+0(FP), BX
    SUBQ BX, AX
    MOVQ AX, ret+96(FP)
    VZEROUPPER
    RET
copyPrefix:
    MOVQ $32, R12
copyPrefixLoop:
    VMOVDQU (R10)(R12*1), Y0
    VMOVDQU Y0, (AX)(R12*1)
    ADDQ $32, R12
    CMPQ R12, R8
    JB copyPrefixLoop
    JMP suffix
copySuffix:
    MOVQ $32, R12
    LEAQ (AX)(R8*1), R13
copySuffixLoop:
    VMOVDQU (BX)(R12*1), Y1
    VMOVDQU Y1, (R13)(R12*1)
    ADDQ $32, R12
    CMPQ R12, R9
    JB copySuffixLoop
    JMP next

// func decodeByteArrayAVX2x128bits(dst, src []byte, prefix, suffix []int32) int
TEXT ·decodeByteArrayAVX2x128bits(SB), NOSPLIT, $0-104
    MOVQ dst_base+0(FP), AX
    MOVQ src_base+24(FP), BX
    MOVQ prefix_base+48(FP), CX
    MOVQ suffix_base+72(FP), DX
    MOVQ suffix_len+80(FP), DI

    XORQ SI, SI
    XORQ R8, R8
    XORQ R9, R9
    VPXOR X0, X0, X0

    JMP test
loop:
    MOVLQZX (CX)(SI*4), R8 // prefix length
    MOVLQZX (DX)(SI*4), R9 // suffix length

    VMOVDQU (BX), X1
    VMOVDQU X0, (AX)
    VMOVDQU X1, (AX)(R8*1)
    VMOVDQU (AX), X0

    ADDQ R9, R8
    LEAQ (AX)(R8*1), AX
    LEAQ (BX)(R9*1), BX
    INCQ SI
test:
    CMPQ SI, DI
    JNE loop
    MOVQ dst_base+0(FP), BX
    SUBQ BX, AX
    MOVQ AX, ret+96(FP)
    VZEROUPPER
    RET


================================================
FILE: encoding/delta/byte_array_purego.go
================================================
//go:build purego || !amd64

package delta

func decodeByteArray(dst, src []byte, prefix, suffix []int32, offsets []uint32) ([]byte, []uint32, error) {
	_ = prefix[:len(suffix)]
	_ = suffix[:len(prefix)]

	var lastValue []byte
	for i := range suffix {
		n := int(suffix[i])
		p := int(prefix[i])
		if n < 0 {
			return dst, offsets, errInvalidNegativeValueLength(n)
		}
		if n > len(src) {
			return dst, offsets, errValueLengthOutOfBounds(n, len(src))
		}
		if p < 0 {
			return dst, offsets, errInvalidNegativePrefixLength(p)
		}
		if p > len(lastValue) {
			return dst, offsets, errPrefixLengthOutOfBounds(p, len(lastValue))
		}
		j := len(dst)
		offsets = append(offsets, uint32(j))
		dst = append(dst, lastValue[:p]...)
		dst = append(dst, src[:n]...)
		lastValue = dst[j:]
		src = src[n:]
	}

	return dst, append(offsets, uint32(len(dst))), nil
}

func decodeFixedLenByteArray(dst, src []byte, size int, prefix, suffix []int32) ([]byte, error) {
	_ = prefix[:len(suffix)]
	_ = suffix[:len(prefix)]

	var lastValue []byte
	for i := range suffix {
		n := int(suffix[i])
		p := int(prefix[i])
		if n < 0 {
			return dst, errInvalidNegativeValueLength(n)
		}
		if n > len(src) {
			return dst, errValueLengthOutOfBounds(n, len(src))
		}
		if p < 0 {
			return dst, errInvalidNegativePrefixLength(p)
		}
		if p > len(lastValue) {
			return dst, errPrefixLengthOutOfBounds(p, len(lastValue))
		}
		j := len(dst)
		dst = append(dst, lastValue[:p]...)
		dst = append(dst, src[:n]...)
		lastValue = dst[j:]
		src = src[n:]
	}
	return dst, nil
}


================================================
FILE: encoding/delta/byte_array_test.go
================================================
package delta

import (
	"bytes"
	"fmt"
	"testing"
)

func TestLinearSearchPrefixLength(t *testing.T) {
	testSearchPrefixLength(t, linearSearchPrefixLength)
}

func TestBinarySearchPrefixLength(t *testing.T) {
	testSearchPrefixLength(t, func(base, data []byte) int {
		return binarySearchPrefixLength(base, data)
	})
}

func testSearchPrefixLength(t *testing.T, prefixLength func(base, data []byte) int) {
	tests := []struct {
		base string
		data string
		len  int
	}{
		{
			base: "",
			data: "",
			len:  0,
		},

		{
			base: "A",
			data: "B",
			len:  0,
		},

		{
			base: "",
			data: "Hello World!",
			len:  0,
		},

		{
			base: "H",
			data: "Hello World!",
			len:  1,
		},

		{
			base: "He",
			data: "Hello World!",
			len:  2,
		},

		{
			base: "Hel",
			data: "Hello World!",
			len:  3,
		},

		{
			base: "Hell",
			data: "Hello World!",
			len:  4,
		},

		{
			base: "Hello",
			data: "Hello World!",
			len:  5,
		},

		{
			base: "Hello ",
			data: "Hello World!",
			len:  6,
		},

		{
			base: "Hello W",
			data: "Hello World!",
			len:  7,
		},

		{
			base: "Hello Wo",
			data: "Hello World!",
			len:  8,
		},

		{
			base: "Hello Wor",
			data: "Hello World!",
			len:  9,
		},

		{
			base: "Hello Worl",
			data: "Hello World!",
			len:  10,
		},

		{
			base: "Hello World",
			data: "Hello World!",
			len:  11,
		},

		{
			base: "Hello World!",
			data: "Hello World!",
			len:  12,
		},

		{
			base: "Hell.......",
			data: "Hello World!",
			len:  4,
		},
	}

	for _, test := range tests {
		t.Run("", func(t *testing.T) {
			n := prefixLength([]byte(test.base), []byte(test.data))
			if n != test.len {
				t.Errorf("prefixLength(%q,%q): want=%d got=%d", test.base, test.data, test.len, n)
			}
		})
	}
}

func BenchmarkLinearSearchPrefixLength(b *testing.B) {
	benchmarkSearchPrefixLength(b, linearSearchPrefixLength)
}

func BenchmarkBinarySearchPrefixLength(b *testing.B) {
	benchmarkSearchPrefixLength(b, func(base, data []byte) int {
		return binarySearchPrefixLength(base, data)
	})
}

func benchmarkSearchPrefixLength(b *testing.B, prefixLength func(base, data []byte) int) {
	buffer := bytes.Repeat([]byte("0123456789"), 100)

	for _, size := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
			base := buffer[:size]
			data := buffer[:size/2]

			for i := 0; i < b.N; i++ {
				_ = prefixLength(base, data)
			}
		})
	}
}


================================================
FILE: encoding/delta/delta.go
================================================
package delta

import (
	"fmt"
	"sync"

	"github.com/segmentio/parquet-go/internal/unsafecast"
)

type int32Buffer struct {
	values []int32
}

func (buf *int32Buffer) resize(size int) {
	if cap(buf.values) < size {
		buf.values = make([]int32, size, 2*size)
	} else {
		buf.values = buf.values[:size]
	}
}

func (buf *int32Buffer) decode(src []byte) ([]byte, error) {
	values, remain, err := decodeInt32(unsafecast.Int32ToBytes(buf.values[:0]), src)
	buf.values = unsafecast.BytesToInt32(values)
	return remain, err
}

func (buf *int32Buffer) sum() (sum int32) {
	for _, v := range buf.values {
		sum += v
	}
	return sum
}

var (
	int32BufferPool sync.Pool // *int32Buffer
)

func getInt32Buffer() *int32Buffer {
	b, _ := int32BufferPool.Get().(*int32Buffer)
	if b != nil {
		b.values = b.values[:0]
	} else {
		b = &int32Buffer{
			values: make([]int32, 0, 1024),
		}
	}
	return b
}

func putInt32Buffer(b *int32Buffer) {
	int32BufferPool.Put(b)
}

func resizeNoMemclr(buf []byte, size int) []byte {
	if cap(buf) < size {
		return grow(buf, size)
	}
	return buf[:size]
}

func resize(buf []byte, size int) []byte {
	if cap(buf) < size {
		return grow(buf, size)
	}
	if size > len(buf) {
		clear := buf[len(buf):size]
		for i := range clear {
			clear[i] = 0
		}
	}
	return buf[:size]
}

func grow(buf []byte, size int) []byte {
	newCap := 2 * cap(buf)
	if newCap < size {
		newCap = size
	}
	newBuf := make([]byte, size, newCap)
	copy(newBuf, buf)
	return newBuf
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

func errPrefixAndSuffixLengthMismatch(prefixLength, suffixLength int) error {
	return fmt.Errorf("length of prefix and suffix mismatch: %d != %d", prefixLength, suffixLength)
}

func errInvalidNegativeValueLength(length int) error {
	return fmt.Errorf("invalid negative value length: %d", length)
}

func errInvalidNegativePrefixLength(length int) error {
	return fmt.Errorf("invalid negative prefix length: %d", length)
}

func errValueLengthOutOfBounds(length, maxLength int) error {
	return fmt.Errorf("value length is larger than the input size: %d > %d", length, maxLength)
}

func errPrefixLengthOutOfBounds(length, maxLength int) error {
	return fmt.Errorf("prefix length %d is larger than the last value of size %d", length, maxLength)
}


================================================
FILE: encoding/delta/delta_amd64.go
================================================
//go:build !purego

package delta

const (
	padding = 64
)

func findNegativeLength(lengths []int32) int {
	for _, n := range lengths {
		if n < 0 {
			return int(n)
		}
	}
	return -1
}


================================================
FILE: encoding/delta/delta_amd64.s
================================================
//go:build !purego

#include "textflag.h"

GLOBL ·rotateLeft32(SB), RODATA|NOPTR, $32
DATA ·rotateLeft32+0(SB)/4, $7
DATA ·rotateLeft32+4(SB)/4, $0
DATA ·rotateLeft32+8(SB)/4, $1
DATA ·rotateLeft32+12(SB)/4, $2
DATA ·rotateLeft32+16(SB)/4, $3
DATA ·rotateLeft32+20(SB)/4, $4
DATA ·rotateLeft32+24(SB)/4, $5
DATA ·rotateLeft32+28(SB)/4, $6


================================================
FILE: encoding/delta/delta_test.go
================================================
//go:build go1.18
// +build go1.18

package delta_test

import (
	"fmt"
	"testing"

	"github.com/segmentio/parquet-go/encoding/delta"
	"github.com/segmentio/parquet-go/encoding/fuzz"
	"github.com/segmentio/parquet-go/encoding/test"
)

func FuzzDeltaBinaryPackedInt32(f *testing.F) {
	fuzz.EncodeInt32(f, new(delta.BinaryPackedEncoding))
}

func FuzzDeltaBinaryPackedInt64(f *testing.F) {
	fuzz.EncodeInt64(f, new(delta.BinaryPackedEncoding))
}

func FuzzDeltaLengthByteArray(f *testing.F) {
	fuzz.EncodeByteArray(f, new(delta.LengthByteArrayEncoding))
}

func FuzzDeltaByteArray(f *testing.F) {
	fuzz.EncodeByteArray(f, new(delta.ByteArrayEncoding))
}

const (
	encodeMinNumValues = 0
	encodeMaxNumValues = 200
)

func TestEncodeInt32(t *testing.T) {
	for bitWidth := uint(0); bitWidth <= 32; bitWidth++ {
		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
			test.EncodeInt32(t,
				new(delta.BinaryPackedEncoding),
				encodeMinNumValues,
				encodeMaxNumValues,
				bitWidth,
			)
		})
	}
}

func TestEncodeInt64(t *testing.T) {
	for bitWidth := uint(0); bitWidth <= 64; bitWidth++ {
		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
			test.EncodeInt64(t,
				new(delta.BinaryPackedEncoding),
				encodeMinNumValues,
				encodeMaxNumValues,
				bitWidth,
			)
		})
	}
}


================================================
FILE: encoding/delta/length_byte_array.go
================================================
package delta

import (
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
)

type LengthByteArrayEncoding struct {
	encoding.NotSupported
}

func (e *LengthByteArrayEncoding) String() string {
	return "DELTA_LENGTH_BYTE_ARRAY"
}

func (e *LengthByteArrayEncoding) Encoding() format.Encoding {
	return format.DeltaLengthByteArray
}

func (e *LengthByteArrayEncoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
	if len(offsets) == 0 {
		return dst[:0], nil
	}

	length := getInt32Buffer()
	defer putInt32Buffer(length)

	length.resize(len(offsets) - 1)
	encodeByteArrayLengths(length.values, offsets)

	dst = dst[:0]
	dst = encodeInt32(dst, length.values)
	dst = append(dst, src...)
	return dst, nil
}

func (e *LengthByteArrayEncoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) {
	dst, offsets = dst[:0], offsets[:0]

	length := getInt32Buffer()
	defer putInt32Buffer(length)

	src, err := length.decode(src)
	if err != nil {
		return dst, offsets, e.wrap(err)
	}

	if size := len(length.values) + 1; cap(offsets) < size {
		offsets = make([]uint32, size, 2*size)
	} else {
		offsets = offsets[:size]
	}

	lastOffset, invalidLength := decodeByteArrayLengths(offsets, length.values)
	if invalidLength != 0 {
		return dst, offsets, e.wrap(errInvalidNegativeValueLength(int(invalidLength)))
	}
	if int(lastOffset) > len(src) {
		return dst, offsets, e.wrap(errValueLengthOutOfBounds(int(lastOffset), len(src)))
	}

	return append(dst, src[:lastOffset]...), offsets, nil
}

func (e *LengthByteArrayEncoding) EstimateDecodeByteArraySize(src []byte) int {
	length := getInt32Buffer()
	defer putInt32Buffer(length)
	length.decode(src)
	return int(length.sum())
}

func (e *LengthByteArrayEncoding) CanDecodeInPlace() bool {
	return true
}

func (e *LengthByteArrayEncoding) wrap(err error) error {
	if err != nil {
		err = encoding.Error(e, err)
	}
	return err
}


================================================
FILE: encoding/delta/length_byte_array_amd64.go
================================================
//go:build !purego

package delta

//go:noescape
func encodeByteArrayLengths(lengths []int32, offsets []uint32)

//go:noescape
func decodeByteArrayLengths(offsets []uint32, lengths []int32) (lastOffset uint32, invalidLength int32)


================================================
FILE: encoding/delta/length_byte_array_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func encodeByteArrayLengths(lengths []int32, offsets []uint32)
TEXT ·encodeByteArrayLengths(SB), NOSPLIT, $0-48
    MOVQ lengths_base+0(FP), AX
    MOVQ lengths_len+8(FP), CX
    MOVQ offsets_base+24(FP), BX
    XORQ SI, SI

    CMPQ CX, $4
    JB test

    MOVQ CX, DX
    SHRQ $2, DX
    SHLQ $2, DX

loopSSE2:
    MOVOU 0(BX)(SI*4), X0
    MOVOU 4(BX)(SI*4), X1
    PSUBL X0, X1
    MOVOU X1, (AX)(SI*4)
    ADDQ $4, SI
    CMPQ SI, DX
    JNE loopSSE2
    JMP test
loop:
    MOVL 0(BX)(SI*4), R8
    MOVL 4(BX)(SI*4), R9
    SUBL R8, R9
    MOVL R9, (AX)(SI*4)
    INCQ SI
test:
    CMPQ SI, CX
    JNE loop
    RET

// func decodeByteArrayLengths(offsets []uint32, length []int32) (lastOffset uint32, invalidLength int32)
TEXT ·decodeByteArrayLengths(SB), NOSPLIT, $0-56
    MOVQ offsets_base+0(FP), AX
    MOVQ lengths_base+24(FP), BX
    MOVQ lengths_len+32(FP), CX

    XORQ DX, DX // lastOffset
    XORQ DI, DI // invalidLength
    XORQ SI, SI

    CMPQ CX, $4
    JL test

    MOVQ CX, R8
    SHRQ $2, R8
    SHLQ $2, R8

    MOVL $0, (AX)
    PXOR X0, X0
    PXOR X3, X3
    // This loop computes the prefix sum of the lengths array in order to
    // generate values of the offsets array.
    //
    // We stick to SSE2 to keep the code simple (the Go compiler appears to
    // assume that SSE2 must be supported on AMD64) which already yields most
    // of the performance that we could get on this subroutine if we were using
    // AVX2.
    //
    // The X3 register also accumulates a mask of all length values, which is
    // checked after the loop to determine whether any of the lengths were
    // negative.
    //
    // The following article contains a description of the prefix sum algorithm
    // used in this function: https://en.algorithmica.org/hpc/algorithms/prefix/
loopSSE2:
    MOVOU (BX)(SI*4), X1
    POR X1, X3

    MOVOA X1, X2
    PSLLDQ $4, X2
    PADDD X2, X1

    MOVOA X1, X2
    PSLLDQ $8, X2
    PADDD X2, X1

    PADDD X1, X0
    MOVOU X0, 4(AX)(SI*4)

    PSHUFD $0b11111111, X0, X0

    ADDQ $4, SI
    CMPQ SI, R8
    JNE loopSSE2

    // If any of the most significant bits of double words in the X3 register
    // are set to 1, it indicates that one of the lengths was negative and
    // therefore the prefix sum is invalid.
    //
    // TODO: we report the invalid length as -1, effectively losing the original
    // value due to the aggregation within X3. This is something that we might
    // want to address in the future to provide better error reporting.
    MOVMSKPS X3, R8
    MOVL $-1, R9
    CMPL R8, $0
    CMOVLNE R9, DI

    MOVQ X0, DX
    JMP test
loop:
    MOVL (BX)(SI*4), R8
    MOVL DX, (AX)(SI*4)
    ADDL R8, DX
    CMPL R8, $0
    CMOVLLT R8, DI
    INCQ SI
test:
    CMPQ SI, CX
    JNE loop

    MOVL DX, (AX)(SI*4)
    MOVL DX, lastOffset+48(FP)
    MOVL DI, invalidLength+52(FP)
    RET


================================================
FILE: encoding/delta/length_byte_array_purego.go
================================================
//go:build purego || !amd64

package delta

func encodeByteArrayLengths(lengths []int32, offsets []uint32) {
	for i := range lengths {
		lengths[i] = int32(offsets[i+1] - offsets[i])
	}
}

func decodeByteArrayLengths(offsets []uint32, lengths []int32) (uint32, int32) {
	lastOffset := uint32(0)

	for i, n := range lengths {
		if n < 0 {
			return lastOffset, n
		}
		offsets[i] = lastOffset
		lastOffset += uint32(n)
	}

	offsets[len(lengths)] = lastOffset
	return lastOffset, 0
}


================================================
FILE: encoding/delta/length_byte_array_test.go
================================================
package delta

import "testing"

func TestDecodeByteArrayLengths(t *testing.T) {
	lengths := make([]int32, 999)
	offsets := make([]uint32, len(lengths)+1)

	totalLength := uint32(0)
	for i := range lengths {
		lengths[i] = int32(i)
		totalLength += uint32(i)
	}

	lastOffset, invalidLength := decodeByteArrayLengths(offsets, lengths)
	if invalidLength != 0 {
		t.Fatal("wrong invalid length:", invalidLength)
	}
	if lastOffset != totalLength {
		t.Fatalf("wrong last offset: want=%d got=%d", lastOffset, totalLength)
	}

	expectOffset := uint32(0)
	for i, offset := range offsets[:len(lengths)] {
		if offset != expectOffset {
			t.Fatalf("wrong offset at index %d: want=%d got=%d", i, expectOffset, offset)
		}
		expectOffset += uint32(lengths[i])
	}

	if offsets[len(lengths)] != lastOffset {
		t.Fatalf("wrong last offset: want=%d got=%d", lastOffset, offsets[len(lengths)])
	}
}


================================================
FILE: encoding/delta/testdata/fuzz/FuzzDeltaByteArray/2404234dd7e87c04303eb7e58208d5b2ccb04fb616c18f3254e2375c4bc327e3
================================================
go test fuzz v1
[]byte("\x80\xf8\xa9\xaf\x14\xfc\r\rR1000")
int64(13)


================================================
FILE: encoding/delta/testdata/fuzz/FuzzDeltaByteArray/4cf9c92e5a2096e3d6c42eaf9b1e31d2567854d33e06c8d2d7a8c46437345850
================================================
go test fuzz v1
[]byte("\xa1\xa1\xa1\xa1\xa1\xa1\xa1\xa1\xa100")
int64(-180)


================================================
FILE: encoding/delta/testdata/fuzz/FuzzDeltaByteArray/9b210529f5e34e2dea5824929bf0d8242dc9c3165c0dce10bb376c50e21b38cc
================================================
go test fuzz v1
[]byte("\x800000\xc9\xc9\xc9\xc9\xc9\xc9\xc9\xc9\xc900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")
int64(-79)


================================================
FILE: encoding/delta/testdata/fuzz/FuzzDeltaByteArray/fbe137144bcda3a149c8ea109703f3242192c5480ea1e82dde0ea24e94f3afef
================================================
go test fuzz v1
[]byte("\x8000000")
int64(-97)


================================================
FILE: encoding/encoding.go
================================================
// Package encoding provides the generic APIs implemented by parquet encodings
// in its sub-packages.
package encoding

import (
	"math"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/format"
)

const (
	MaxFixedLenByteArraySize = math.MaxInt16
)

// The Encoding interface is implemented by types representing parquet column
// encodings.
//
// Encoding instances must be safe to use concurrently from multiple goroutines.
type Encoding interface {
	// Returns a human-readable name for the encoding.
	String() string

	// Returns the parquet code representing the encoding.
	Encoding() format.Encoding

	// Encode methods serialize the source sequence of values into the
	// destination buffer, potentially reallocating it if it was too short to
	// contain the output.
	//
	// The methods panic if the type of src values differ from the type of
	// values being encoded.
	EncodeLevels(dst []byte, src []uint8) ([]byte, error)
	EncodeBoolean(dst []byte, src []byte) ([]byte, error)
	EncodeInt32(dst []byte, src []int32) ([]byte, error)
	EncodeInt64(dst []byte, src []int64) ([]byte, error)
	EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error)
	EncodeFloat(dst []byte, src []float32) ([]byte, error)
	EncodeDouble(dst []byte, src []float64) ([]byte, error)
	EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error)
	EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error)

	// Decode methods deserialize from the source buffer into the destination
	// slice, potentially growing it if it was too short to contain the result.
	//
	// The methods panic if the type of dst values differ from the type of
	// values being decoded.
	DecodeLevels(dst []uint8, src []byte) ([]uint8, error)
	DecodeBoolean(dst []byte, src []byte) ([]byte, error)
	DecodeInt32(dst []int32, src []byte) ([]int32, error)
	DecodeInt64(dst []int64, src []byte) ([]int64, error)
	DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error)
	DecodeFloat(dst []float32, src []byte) ([]float32, error)
	DecodeDouble(dst []float64, src []byte) ([]float64, error)
	DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error)
	DecodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error)

	// Computes an estimation of the output size of decoding the encoded page
	// of values passed as argument.
	//
	// Note that this is an estimate, it is useful to preallocate the output
	// buffer that will be passed to the decode method, but the actual output
	// size may be different.
	//
	// The estimate never errors since it is not intended to be used as an
	// input validation method.
	EstimateDecodeByteArraySize(src []byte) int

	// When this method returns true, the encoding supports receiving the same
	// buffer as source and destination.
	CanDecodeInPlace() bool
}


================================================
FILE: encoding/encoding_test.go
================================================
package encoding_test

import (
	"bytes"
	"io"
	"math"
	"math/bits"
	"math/rand"
	"testing"
	"time"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/encoding/bitpacked"
	"github.com/segmentio/parquet-go/encoding/bytestreamsplit"
	"github.com/segmentio/parquet-go/encoding/delta"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/encoding/rle"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

func repeatInt64(seq []int64, n int) []int64 {
	rep := make([]int64, len(seq)*n)
	for i := 0; i < n; i++ {
		copy(rep[i*len(seq):], seq)
	}
	return rep
}

var booleanTests = [...][]bool{
	{},
	{true},
	{false},
	{true, false, true, false, true, true, true, false, false, true},
	{ // repeating 32x
		true, true, true, true, true, true, true, true,
		true, true, true, true, true, true, true, true,
		true, true, true, true, true, true, true, true,
		true, true, true, true, true, true, true, true,
	},
	{ // repeating 33x
		true, true, true, true, true, true, true, true,
		true, true, true, true, true, true, true, true,
		true, true, true, true, true, true, true, true,
		true, true, true, true, true, true, true, true,
		true,
	},
	{ // alternating 15x
		false, true, false, true, false, true, false, true,
		false, true, false, true, false, true, false,
	},
	{ // alternating 16x
		false, true, false, true, false, true, false, true,
		false, true, false, true, false, true, false, true,
	},
}

var levelsTests = [...][]byte{
	{},
	{0},
	{1},
	{0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt8, math.MaxInt8, 0},
	{ // repeating 24x
		42, 42, 42, 42, 42, 42, 42, 42,
		42, 42, 42, 42, 42, 42, 42, 42,
		42, 42, 42, 42, 42, 42, 42, 42,
	},
	{ // never repeating
		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
		0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
		0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
	},
	{ // streaks of repeating values
		0, 0, 0, 0, 1, 1, 1, 1,
		2, 2, 2, 2, 3, 3, 3, 3,
		4, 4, 4, 4, 5, 5, 5, 5,
		6, 6, 6, 7, 7, 7, 8, 8,
		8, 9, 9, 9,
	},
}

var int32Tests = [...][]int32{
	{},
	{0},
	{1},
	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt32, math.MaxInt32, 0},
	{ // repeating 24x
		42, 42, 42, 42, 42, 42, 42, 42,
		42, 42, 42, 42, 42, 42, 42, 42,
		42, 42, 42, 42, 42, 42, 42, 42,
	},
	{ // never repeating
		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
		0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
		0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
	},
	{ // streaks of repeating values
		0, 0, 0, 0, 1, 1, 1, 1,
		2, 2, 2, 2, 3, 3, 3, 3,
		4, 4, 4, 4, 5, 5, 5, 5,
		6, 6, 6, 7, 7, 7, 8, 8,
		8, 9, 9, 9,
	},
	{ // a sequence that triggered a bug in the delta binary packed encoding
		24, 36, 47, 32, 29, 4, 9, 20, 2, 18,
	},
}

var int64Tests = [...][]int64{
	{},
	{0},
	{1},
	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxInt64, math.MaxInt64, 0},
	{ // repeating 24x
		42, 42, 42, 42, 42, 42, 42, 42,
		42, 42, 42, 42, 42, 42, 42, 42,
		42, 42, 42, 42, 42, 42, 42, 42,
	},
	{ // never repeating
		0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
		0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
		0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
		0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
	},
	{ // streaks of repeating values
		0, 0, 0, 0, 1, 1, 1, 1,
		2, 2, 2, 2, 3, 3, 3, 3,
		4, 4, 4, 4, 5, 5, 5, 5,
		6, 6, 6, 7, 7, 7, 8, 8,
		8, 9, 9, 9,
	},
	{ // streaks of repeating values
		0, 0, 0, 0, 1, 1, 1, 1,
		2, 2, 2, 2, 3, 3, 3, 3,
		4, 4, 4, 4, 5, 5, 5, 5,
		6, 6, 6, 7, 7, 7, 8, 8,
		8, 9, 9, 9,
	},
	repeatInt64( // a sequence resulting in 64 bits words in the delta binary packed encoding
		[]int64{
			math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64,
			math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64,

			0, math.MaxInt64, math.MinInt64, math.MaxInt64,
			math.MinInt64, math.MaxInt64, math.MinInt64, math.MaxInt64,
		},
		5,
	),
}

var int96Tests = [...][]deprecated.Int96{
	{},
	{{0: 0}},
	{{0: 1}},
}

var floatTests = [...][]float32{
	{},
	{0},
	{1},
	{0, 1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0},
	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat32, math.MaxFloat32, 0},
}

var doubleTests = [...][]float64{
	{},
	{0},
	{1},
	{-1, 0, 1, 0, 2, 3, 4, 5, 6, math.MaxFloat64, math.MaxFloat64, 0},
}

var byteArrayTests = [...][][]byte{
	{},
	{[]byte("")},
	{[]byte("A"), []byte("B"), []byte("C")},
	{[]byte("hello world!"), bytes.Repeat([]byte("1234567890"), 100)},
}

var fixedLenByteArrayTests = [...]struct {
	size int
	data []byte
}{
	{size: 1, data: []byte("")},
	{size: 1, data: []byte("ABCDEFGH")},
	{size: 2, data: []byte("ABCDEFGH")},
	{size: 4, data: []byte("ABCDEFGH")},
	{size: 8, data: []byte("ABCDEFGH")},
	{size: 10, data: bytes.Repeat([]byte("123456789"), 100)},
	{size: 16, data: bytes.Repeat([]byte("1234567890"), 160)},
}

var encodings = [...]encoding.Encoding{
	new(plain.Encoding),
	new(rle.Encoding),
	new(bitpacked.Encoding),
	new(plain.DictionaryEncoding),
	new(rle.DictionaryEncoding),
	new(delta.BinaryPackedEncoding),
	new(delta.LengthByteArrayEncoding),
	new(delta.ByteArrayEncoding),
	new(bytestreamsplit.Encoding),
}

func TestEncoding(t *testing.T) {
	for _, encoding := range encodings {
		t.Run(encoding.String(), func(t *testing.T) { testEncoding(t, encoding) })
	}
}

func testEncoding(t *testing.T, e encoding.Encoding) {
	for _, test := range [...]struct {
		scenario string
		function func(*testing.T, encoding.Encoding)
	}{
		{
			scenario: "boolean",
			function: testBooleanEncoding,
		},

		{
			scenario: "levels",
			function: testLevelsEncoding,
		},

		{
			scenario: "int32",
			function: testInt32Encoding,
		},

		{
			scenario: "int64",
			function: testInt64Encoding,
		},

		{
			scenario: "int96",
			function: testInt96Encoding,
		},

		{
			scenario: "float",
			function: testFloatEncoding,
		},

		{
			scenario: "double",
			function: testDoubleEncoding,
		},

		{
			scenario: "byte array",
			function: testByteArrayEncoding,
		},

		{
			scenario: "fixed length byte array",
			function: testFixedLenByteArrayEncoding,
		},
	} {
		t.Run(test.scenario, func(t *testing.T) { test.function(t, e) })
	}
}

func setBitWidth(enc encoding.Encoding, bitWidth int) {
	switch e := enc.(type) {
	case *rle.Encoding:
		e.BitWidth = bitWidth
	case *bitpacked.Encoding:
		e.BitWidth = bitWidth
	}
}

type encodingFunc func(encoding.Encoding, []byte, []byte) ([]byte, error)

func testBooleanEncoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeBoolean(t, e)
	buffer := []byte{}
	values := []byte{}
	input := []byte{}
	setBitWidth(e, 1)

	for _, test := range booleanTests {
		t.Run("", func(t *testing.T) {
			var err error

			input = input[:0]
			count := 0
			for _, value := range test {
				input = plain.AppendBoolean(input, count, value)
				count++
			}

			buffer, err = e.EncodeBoolean(buffer, input)
			assertNoError(t, err)
			values, err = e.DecodeBoolean(values, buffer)
			assertNoError(t, err)
			assertEqualBytes(t, input, values)
		})
	}
}

func testLevelsEncoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeLevels(t, e)
	buffer := []byte{}
	values := []byte{}

	for _, input := range levelsTests {
		setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(input)))

		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeLevels(buffer, input)
			assertNoError(t, err)
			values, err = e.DecodeLevels(values, buffer)
			assertNoError(t, err)
			assertEqualBytes(t, input, values[:len(input)])
		})
	}
}

func testInt32Encoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeInt32(t, e)
	buffer := []byte{}
	values := []int32{}

	for _, input := range int32Tests {
		setBitWidth(e, maxLenInt32(input))

		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeInt32(buffer, input)
			assertNoError(t, err)
			values, err = e.DecodeInt32(values, buffer)
			assertNoError(t, err)
			assertEqualInt32(t, input, values)
		})
	}
}

func testInt64Encoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeInt64(t, e)
	buffer := []byte{}
	values := []int64{}

	for _, input := range int64Tests {
		setBitWidth(e, maxLenInt64(input))

		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeInt64(buffer, input)
			assertNoError(t, err)
			values, err = e.DecodeInt64(values, buffer)
			assertNoError(t, err)
			assertEqualInt64(t, input, values)
		})
	}
}

func testInt96Encoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeInt96(t, e)
	buffer := []byte{}
	values := []deprecated.Int96{}

	for _, input := range int96Tests {
		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeInt96(buffer, input)
			assertNoError(t, err)
			values, err = e.DecodeInt96(values, buffer)
			assertNoError(t, err)
			assertEqualInt96(t, input, values)
		})
	}
}

func testFloatEncoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeFloat(t, e)
	buffer := []byte{}
	values := []float32{}

	for _, input := range floatTests {
		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeFloat(buffer, input)
			assertNoError(t, err)
			values, err = e.DecodeFloat(values, buffer)
			assertNoError(t, err)
			assertEqualFloat32(t, input, values)
		})
	}
}

func testDoubleEncoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeDouble(t, e)
	buffer := []byte{}
	values := []float64{}

	for _, input := range doubleTests {
		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeDouble(buffer, input)
			assertNoError(t, err)
			values, err = e.DecodeDouble(values, buffer)
			assertNoError(t, err)
			assertEqualFloat64(t, input, values)
		})
	}
}

func testByteArrayEncoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeByteArray(t, e)
	input := []byte{}
	buffer := []byte{}
	values := []byte{}
	offsets := []uint32{}

	for _, test := range byteArrayTests {
		offsets, input = offsets[:0], input[:0]
		lastOffset := uint32(0)

		for _, value := range test {
			offsets = append(offsets, lastOffset)
			input = append(input, value...)
			lastOffset += uint32(len(value))
		}

		offsets = append(offsets, lastOffset)

		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeByteArray(buffer, input, offsets)
			assertNoError(t, err)
			estimatedOutputSize := e.EstimateDecodeByteArraySize(buffer)
			values, _, err = e.DecodeByteArray(values, buffer, offsets)
			assertNoError(t, err)
			assertEqualBytes(t, input, values)
			if len(values) > estimatedOutputSize {
				t.Errorf("the decode output was larger than the estimate: %d>%d", len(values), estimatedOutputSize)
			}
		})
	}
}

func testFixedLenByteArrayEncoding(t *testing.T, e encoding.Encoding) {
	testCanEncodeFixedLenByteArray(t, e)
	buffer := []byte{}
	values := []byte{}

	for _, test := range fixedLenByteArrayTests {
		t.Run("", func(t *testing.T) {
			var err error
			buffer, err = e.EncodeFixedLenByteArray(buffer, test.data, test.size)
			assertNoError(t, err)
			values, err = e.DecodeFixedLenByteArray(values, buffer, test.size)
			assertNoError(t, err)
			assertEqualBytes(t, test.data, values)
		})
	}
}

func testCanEncodeBoolean(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeBoolean)
}

func testCanEncodeLevels(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeLevels)
}

func testCanEncodeInt32(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeInt32)
}

func testCanEncodeInt64(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeInt64)
}

func testCanEncodeInt96(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeInt96)
}

func testCanEncodeFloat(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeFloat)
}

func testCanEncodeDouble(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeDouble)
}

func testCanEncodeByteArray(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeByteArray)
}

func testCanEncodeFixedLenByteArray(t testing.TB, e encoding.Encoding) {
	testCanEncode(t, e, encoding.CanEncodeFixedLenByteArray)
}

func testCanEncode(t testing.TB, e encoding.Encoding, test func(encoding.Encoding) bool) {
	if !test(e) {
		t.Skip("encoding not supported")
	}
}

func assertNoError(t *testing.T, err error) {
	t.Helper()
	if err != nil {
		t.Fatal(err)
	}
}

func assertEqualBytes(t *testing.T, want, got []byte) {
	t.Helper()
	if !bytes.Equal(want, got) {
		t.Fatalf("values mismatch:\nwant = %q\ngot  = %q", want, got)
	}
}

func assertEqualInt32(t *testing.T, want, got []int32) {
	t.Helper()
	assertEqualBytes(t, unsafecast.Int32ToBytes(want), unsafecast.Int32ToBytes(got))
}

func assertEqualInt64(t *testing.T, want, got []int64) {
	t.Helper()
	assertEqualBytes(t, unsafecast.Int64ToBytes(want), unsafecast.Int64ToBytes(got))
}

func assertEqualInt96(t *testing.T, want, got []deprecated.Int96) {
	t.Helper()
	assertEqualBytes(t, deprecated.Int96ToBytes(want), deprecated.Int96ToBytes(got))
}

func assertEqualFloat32(t *testing.T, want, got []float32) {
	t.Helper()
	assertEqualBytes(t, unsafecast.Float32ToBytes(want), unsafecast.Float32ToBytes(got))
}

func assertEqualFloat64(t *testing.T, want, got []float64) {
	t.Helper()
	assertEqualBytes(t, unsafecast.Float64ToBytes(want), unsafecast.Float64ToBytes(got))
}

const (
	benchmarkNumValues = 10e3
)

func newRand() *rand.Rand {
	return rand.New(rand.NewSource(1))
}

func BenchmarkEncode(b *testing.B) {
	for _, encoding := range encodings {
		b.Run(encoding.String(), func(b *testing.B) { benchmarkEncode(b, encoding) })
	}
}

func benchmarkEncode(b *testing.B, e encoding.Encoding) {
	for _, test := range [...]struct {
		scenario string
		function func(*testing.B, encoding.Encoding)
	}{
		{
			scenario: "boolean",
			function: benchmarkEncodeBoolean,
		},
		{
			scenario: "levels",
			function: benchmarkEncodeLevels,
		},
		{
			scenario: "int32",
			function: benchmarkEncodeInt32,
		},
		{
			scenario: "int64",
			function: benchmarkEncodeInt64,
		},
		{
			scenario: "float",
			function: benchmarkEncodeFloat,
		},
		{
			scenario: "double",
			function: benchmarkEncodeDouble,
		},
		{
			scenario: "byte array",
			function: benchmarkEncodeByteArray,
		},
		{
			scenario: "fixed length byte array",
			function: benchmarkEncodeFixedLenByteArray,
		},
	} {
		b.Run(test.scenario, func(b *testing.B) { test.function(b, e) })
	}
}

func benchmarkEncodeBoolean(b *testing.B, e encoding.Encoding) {
	testCanEncodeBoolean(b, e)
	buffer := make([]byte, 0)
	values := generateBooleanValues(benchmarkNumValues, newRand())
	setBitWidth(e, 1)

	reportThroughput(b, benchmarkNumValues, len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeBoolean(buffer, values)
		})
	})
}

func benchmarkEncodeLevels(b *testing.B, e encoding.Encoding) {
	testCanEncodeLevels(b, e)
	buffer := make([]byte, 0)
	values := generateLevelValues(benchmarkNumValues, newRand())
	setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values)))

	reportThroughput(b, benchmarkNumValues, len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeLevels(buffer, values)
		})
	})
}

func benchmarkEncodeInt32(b *testing.B, e encoding.Encoding) {
	testCanEncodeInt32(b, e)
	buffer := make([]byte, 0)
	values := generateInt32Values(benchmarkNumValues, newRand())
	setBitWidth(e, maxLenInt32(values))

	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeInt32(buffer, values)
		})
	})
}

func benchmarkEncodeInt64(b *testing.B, e encoding.Encoding) {
	testCanEncodeInt64(b, e)
	buffer := make([]byte, 0)
	values := generateInt64Values(benchmarkNumValues, newRand())
	setBitWidth(e, maxLenInt64(values))

	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeInt64(buffer, values)
		})
	})
}

func benchmarkEncodeFloat(b *testing.B, e encoding.Encoding) {
	testCanEncodeFloat(b, e)
	buffer := make([]byte, 0)
	values := generateFloatValues(benchmarkNumValues, newRand())

	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeFloat(buffer, values)
		})
	})
}

func benchmarkEncodeDouble(b *testing.B, e encoding.Encoding) {
	testCanEncodeDouble(b, e)
	buffer := make([]byte, 0)
	values := generateDoubleValues(benchmarkNumValues, newRand())

	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeDouble(buffer, values)
		})
	})
}

func benchmarkEncodeByteArray(b *testing.B, e encoding.Encoding) {
	testCanEncodeByteArray(b, e)
	buffer := make([]byte, 0)
	values, offsets := generateByteArrayValues(benchmarkNumValues, newRand())

	numBytes := len(values) + 4*len(offsets)
	reportThroughput(b, benchmarkNumValues, numBytes, func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeByteArray(buffer, values, offsets)
		})
	})
}

func benchmarkEncodeFixedLenByteArray(b *testing.B, e encoding.Encoding) {
	testCanEncodeFixedLenByteArray(b, e)
	const size = 16
	buffer := make([]byte, 0)
	values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size)

	reportThroughput(b, benchmarkNumValues, len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			buffer, _ = e.EncodeFixedLenByteArray(buffer, values, size)
		})
	})
}

func BenchmarkDecode(b *testing.B) {
	for _, encoding := range encodings {
		b.Run(encoding.String(), func(b *testing.B) { benchmarkDecode(b, encoding) })
	}
}

func benchmarkDecode(b *testing.B, e encoding.Encoding) {
	for _, test := range [...]struct {
		scenario string
		function func(*testing.B, encoding.Encoding)
	}{
		{
			scenario: "boolean",
			function: benchmarkDecodeBoolean,
		},
		{
			scenario: "levels",
			function: benchmarkDecodeLevels,
		},
		{
			scenario: "int32",
			function: benchmarkDecodeInt32,
		},
		{
			scenario: "int64",
			function: benchmarkDecodeInt64,
		},
		{
			scenario: "float",
			function: benchmarkDecodeFloat,
		},
		{
			scenario: "double",
			function: benchmarkDecodeDouble,
		},
		{
			scenario: "byte array",
			function: benchmarkDecodeByteArray,
		},
		{
			scenario: "fixed length byte array",
			function: benchmarkDecodeFixedLenByteArray,
		},
	} {
		b.Run(test.scenario, func(b *testing.B) { test.function(b, e) })
	}
}

func benchmarkDecodeBoolean(b *testing.B, e encoding.Encoding) {
	testCanEncodeBoolean(b, e)
	values := generateBooleanValues(benchmarkNumValues, newRand())
	setBitWidth(e, 1)
	buffer, _ := e.EncodeBoolean(nil, values)

	reportThroughput(b, benchmarkNumValues, len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, _ = e.DecodeBoolean(values, buffer)
		})
	})
}

func benchmarkDecodeLevels(b *testing.B, e encoding.Encoding) {
	testCanEncodeLevels(b, e)
	values := generateLevelValues(benchmarkNumValues, newRand())
	setBitWidth(e, maxLenInt8(unsafecast.BytesToInt8(values)))
	buffer, _ := e.EncodeLevels(nil, values)

	reportThroughput(b, benchmarkNumValues, len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, _ = e.DecodeLevels(values, buffer)
		})
	})
}

func benchmarkDecodeInt32(b *testing.B, e encoding.Encoding) {
	testCanEncodeInt32(b, e)
	values := generateInt32Values(benchmarkNumValues, newRand())
	setBitWidth(e, maxLenInt32(values))
	buffer, _ := e.EncodeInt32(nil, values)

	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, _ = e.DecodeInt32(values, buffer)
		})
	})
}

func benchmarkDecodeInt64(b *testing.B, e encoding.Encoding) {
	testCanEncodeInt64(b, e)
	values := generateInt64Values(benchmarkNumValues, newRand())
	setBitWidth(e, maxLenInt64(values))
	buffer, _ := e.EncodeInt64(nil, values)

	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, _ = e.DecodeInt64(values, buffer)
		})
	})
}

func benchmarkDecodeFloat(b *testing.B, e encoding.Encoding) {
	testCanEncodeFloat(b, e)
	values := generateFloatValues(benchmarkNumValues, newRand())
	buffer, _ := e.EncodeFloat(nil, values)

	reportThroughput(b, benchmarkNumValues, 4*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, _ = e.DecodeFloat(values, buffer)
		})
	})
}

func benchmarkDecodeDouble(b *testing.B, e encoding.Encoding) {
	testCanEncodeDouble(b, e)
	values := generateDoubleValues(benchmarkNumValues, newRand())
	buffer, _ := e.EncodeDouble(nil, values)

	reportThroughput(b, benchmarkNumValues, 8*len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, _ = e.DecodeDouble(values, buffer)
		})
	})
}

func benchmarkDecodeByteArray(b *testing.B, e encoding.Encoding) {
	testCanEncodeByteArray(b, e)
	values, offsets := generateByteArrayValues(benchmarkNumValues, newRand())
	buffer, _ := e.EncodeByteArray(nil, values, offsets)

	numBytes := len(values) + 4*len(offsets)
	reportThroughput(b, benchmarkNumValues, numBytes, func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, offsets, _ = e.DecodeByteArray(values, buffer, offsets)
		})
	})
}

func benchmarkDecodeFixedLenByteArray(b *testing.B, e encoding.Encoding) {
	testCanEncodeFixedLenByteArray(b, e)
	const size = 16
	values := generateFixedLenByteArrayValues(benchmarkNumValues, newRand(), size)
	buffer, _ := e.EncodeFixedLenByteArray(nil, values, size)

	reportThroughput(b, benchmarkNumValues, len(values), func() {
		benchmarkZeroAllocsPerRun(b, func() {
			values, _ = e.DecodeFixedLenByteArray(values, buffer, size)
		})
	})
}

func benchmarkZeroAllocsPerRun(b *testing.B, f func()) {
	if allocs := testing.AllocsPerRun(b.N, f); allocs != 0 && !testing.Short() {
		b.Errorf("too many memory allocations: %g", allocs)
	}
}

func reportThroughput(b *testing.B, numValues, numBytes int, do func()) {
	start := time.Now()
	do()
	seconds := time.Since(start).Seconds()
	b.SetBytes(int64(numBytes))
	b.ReportMetric(float64(b.N*numValues)/seconds, "value/s")
}

func generateLevelValues(n int, r *rand.Rand) []uint8 {
	values := make([]uint8, n)
	for i := range values {
		values[i] = uint8(r.Intn(6))
	}
	return values
}

func generateBooleanValues(n int, r *rand.Rand) []byte {
	values := make([]byte, n/8+1)
	io.ReadFull(r, values)
	return values
}

func generateInt32Values(n int, r *rand.Rand) []int32 {
	values := make([]int32, n)
	for i := range values {
		values[i] = r.Int31n(100)
	}
	return values
}

func generateInt64Values(n int, r *rand.Rand) []int64 {
	values := make([]int64, n)
	for i := range values {
		values[i] = r.Int63n(100)
	}
	return values
}

func generateFloatValues(n int, r *rand.Rand) []float32 {
	values := make([]float32, n)
	for i := range values {
		values[i] = r.Float32()
	}
	return values
}

func generateDoubleValues(n int, r *rand.Rand) []float64 {
	values := make([]float64, n)
	for i := range values {
		values[i] = r.Float64()
	}
	return values
}

func generateByteArrayValues(n int, r *rand.Rand) ([]byte, []uint32) {
	const maxLen = 21
	offsets := make([]uint32, n+1)
	values := make([]byte, n*maxLen)
	length := 0

	for i := 0; i < n; i++ {
		k := r.Intn(maxLen) + 1
		io.ReadFull(r, values[length:length+k])
		offsets[i] = uint32(length)
		length += k
	}

	offsets[n] = uint32(length)
	return values[:length], offsets
}

func generateFixedLenByteArrayValues(n int, r *rand.Rand, size int) []byte {
	values := make([]byte, n*size)
	io.ReadFull(r, values)
	return values
}

func maxLenInt8(data []int8) int {
	max := 0
	for _, v := range data {
		if n := bits.Len8(uint8(v)); n > max {
			max = n
		}
	}
	return max
}

func maxLenInt32(data []int32) int {
	max := 0
	for _, v := range data {
		if n := bits.Len32(uint32(v)); n > max {
			max = n
		}
	}
	return max
}

func maxLenInt64(data []int64) int {
	max := 0
	for _, v := range data {
		if n := bits.Len64(uint64(v)); n > max {
			max = n
		}
	}
	return max
}


================================================
FILE: encoding/fuzz/fuzz.go
================================================
//go:build go1.18
// +build go1.18

// Package fuzz contains functions to help fuzz test parquet encodings.
package fuzz

import (
	"math/rand"
	"testing"
	"unsafe"

	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

func EncodeBoolean(f *testing.F, e encoding.Encoding) {
	encode(f, e,
		encoding.Encoding.EncodeBoolean,
		encoding.Encoding.DecodeBoolean,
		generate[byte],
	)
}

func EncodeLevels(f *testing.F, e encoding.Encoding) {
	encode(f, e,
		encoding.Encoding.EncodeLevels,
		encoding.Encoding.DecodeLevels,
		generate[byte],
	)
}

func EncodeInt32(f *testing.F, e encoding.Encoding) {
	encode(f, e,
		encoding.Encoding.EncodeInt32,
		encoding.Encoding.DecodeInt32,
		generate[int32],
	)
}

func EncodeInt64(f *testing.F, e encoding.Encoding) {
	encode(f, e,
		encoding.Encoding.EncodeInt64,
		encoding.Encoding.DecodeInt64,
		generate[int64],
	)
}

func EncodeFloat(f *testing.F, e encoding.Encoding) {
	encode(f, e,
		encoding.Encoding.EncodeFloat,
		encoding.Encoding.DecodeFloat,
		generate[float32],
	)
}

func EncodeDouble(f *testing.F, e encoding.Encoding) {
	encode(f, e,
		encoding.Encoding.EncodeDouble,
		encoding.Encoding.DecodeDouble,
		generate[float64],
	)
}

func EncodeByteArray(f *testing.F, e encoding.Encoding) {
	encode(f, e,
		func(enc encoding.Encoding, dst []byte, src []string) ([]byte, error) {
			size := 0
			for _, s := range src {
				size += len(s)
			}

			offsets := make([]uint32, 0, len(src)+1)
			values := make([]byte, 0, size)

			for _, s := range src {
				offsets = append(offsets, uint32(len(values)))
				values = append(values, s...)
			}

			offsets = append(offsets, uint32(len(values)))
			return enc.EncodeByteArray(dst, values, offsets)
		},

		func(enc encoding.Encoding, dst []string, src []byte) ([]string, error) {
			dst = dst[:0]

			values, offsets, err := enc.DecodeByteArray(nil, src, nil)
			if err != nil {
				return dst, err
			}

			if len(offsets) > 0 {
				baseOffset := offsets[0]

				for _, endOffset := range offsets[1:] {
					dst = append(dst, unsafecast.BytesToString(values[baseOffset:endOffset]))
					baseOffset = endOffset
				}
			}

			return dst, nil
		},

		func(dst []string, src []byte, prng *rand.Rand) []string {
			limit := len(src)/10 + 1

			for i := 0; i < len(src); {
				n := prng.Intn(limit) + 1
				r := len(src) - i
				if n > r {
					n = r
				}
				dst = append(dst, unsafecast.BytesToString(src[i:i+n]))
				i += n
			}

			return dst
		},
	)
}

type encodingFunc[T comparable] func(encoding.Encoding, []byte, []T) ([]byte, error)

type decodingFunc[T comparable] func(encoding.Encoding, []T, []byte) ([]T, error)

type generateFunc[T comparable] func(dst []T, src []byte, prng *rand.Rand) []T

func encode[T comparable](f *testing.F, e encoding.Encoding, encode encodingFunc[T], decode decodingFunc[T], generate generateFunc[T]) {
	const bufferSize = 64 * 1024
	var zero T
	var err error
	var buf = make([]T, bufferSize/unsafe.Sizeof(zero))
	var src = make([]T, bufferSize/unsafe.Sizeof(zero))
	var dst = make([]byte, bufferSize)
	var prng = rand.New(rand.NewSource(0))

	f.Fuzz(func(t *testing.T, input []byte, seed int64) {
		prng.Seed(seed)
		src = generate(src[:0], input, prng)

		dst, err = encode(e, dst, src)
		if err != nil {
			t.Error(err)
			return
		}

		buf, err = decode(e, buf, dst)
		if err != nil {
			t.Error(err)
			return
		}

		if !equal(buf, src) {
			t.Error("decoded output does not match the original input")
			return
		}
	})
}

func equal[T comparable](a, b []T) bool {
	if len(a) != len(b) {
		return false
	}
	for i := range a {
		if a[i] != b[i] {
			return false
		}
	}
	return true
}

func generate[T comparable](dst []T, src []byte, prng *rand.Rand) []T {
	return append(dst[:0], unsafecast.Slice[T](src)...)
}


================================================
FILE: encoding/notsupported.go
================================================
package encoding

import (
	"errors"
	"fmt"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/format"
)

var (
	// ErrNotSupported is an error returned when the underlying encoding does
	// not support the type of values being encoded or decoded.
	//
	// This error may be wrapped with type information, applications must use
	// errors.Is rather than equality comparisons to test the error values
	// returned by encoders and decoders.
	ErrNotSupported = errors.New("encoding not supported")

	// ErrInvalidArgument is an error returned one or more arguments passed to
	// the encoding functions are incorrect.
	//
	// As with ErrNotSupported, this error may be wrapped with specific
	// information about the problem and applications are expected to use
	// errors.Is for comparisons.
	ErrInvalidArgument = errors.New("invalid argument")
)

// Error constructs an error which wraps err and indicates that it originated
// from the given encoding.
func Error(e Encoding, err error) error {
	return fmt.Errorf("%s: %w", e, err)
}

// Errorf is like Error but constructs the error message from the given format
// and arguments.
func Errorf(e Encoding, msg string, args ...interface{}) error {
	return Error(e, fmt.Errorf(msg, args...))
}

// ErrEncodeInvalidInputSize constructs an error indicating that encoding failed
// due to the size of the input.
func ErrEncodeInvalidInputSize(e Encoding, typ string, size int) error {
	return errInvalidInputSize(e, "encode", typ, size)
}

// ErrDecodeInvalidInputSize constructs an error indicating that decoding failed
// due to the size of the input.
func ErrDecodeInvalidInputSize(e Encoding, typ string, size int) error {
	return errInvalidInputSize(e, "decode", typ, size)
}

func errInvalidInputSize(e Encoding, op, typ string, size int) error {
	return Errorf(e, "cannot %s %s from input of size %d: %w", op, typ, size, ErrInvalidArgument)
}

// CanEncodeInt8 reports whether e can encode LEVELS values.
func CanEncodeLevels(e Encoding) bool {
	_, err := e.EncodeLevels(nil, nil)
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeBoolean reports whether e can encode BOOLEAN values.
func CanEncodeBoolean(e Encoding) bool {
	_, err := e.EncodeBoolean(nil, nil)
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeInt32 reports whether e can encode INT32 values.
func CanEncodeInt32(e Encoding) bool {
	_, err := e.EncodeInt32(nil, nil)
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeInt64 reports whether e can encode INT64 values.
func CanEncodeInt64(e Encoding) bool {
	_, err := e.EncodeInt64(nil, nil)
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeInt96 reports whether e can encode INT96 values.
func CanEncodeInt96(e Encoding) bool {
	_, err := e.EncodeInt96(nil, nil)
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeFloat reports whether e can encode FLOAT values.
func CanEncodeFloat(e Encoding) bool {
	_, err := e.EncodeFloat(nil, nil)
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeDouble reports whether e can encode DOUBLE values.
func CanEncodeDouble(e Encoding) bool {
	_, err := e.EncodeDouble(nil, nil)
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeByteArray reports whether e can encode BYTE_ARRAY values.
func CanEncodeByteArray(e Encoding) bool {
	_, err := e.EncodeByteArray(nil, nil, zeroOffsets[:])
	return !errors.Is(err, ErrNotSupported)
}

// CanEncodeFixedLenByteArray reports whether e can encode
// FIXED_LEN_BYTE_ARRAY values.
func CanEncodeFixedLenByteArray(e Encoding) bool {
	_, err := e.EncodeFixedLenByteArray(nil, nil, 1)
	return !errors.Is(err, ErrNotSupported)
}

var zeroOffsets [1]uint32

// NotSupported is a type satisfying the Encoding interface which does not
// support encoding nor decoding any value types.
type NotSupported struct {
}

func (NotSupported) String() string {
	return "NOT_SUPPORTED"
}

func (NotSupported) Encoding() format.Encoding {
	return -1
}

func (NotSupported) EncodeLevels(dst []byte, src []uint8) ([]byte, error) {
	return dst[:0], errNotSupported("LEVELS")
}

func (NotSupported) EncodeBoolean(dst []byte, src []byte) ([]byte, error) {
	return dst[:0], errNotSupported("BOOLEAN")
}

func (NotSupported) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
	return dst[:0], errNotSupported("INT32")
}

func (NotSupported) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
	return dst[:0], errNotSupported("INT64")
}

func (NotSupported) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) {
	return dst[:0], errNotSupported("INT96")
}

func (NotSupported) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
	return dst[:0], errNotSupported("FLOAT")
}

func (NotSupported) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
	return dst[:0], errNotSupported("DOUBLE")
}

func (NotSupported) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
	return dst[:0], errNotSupported("BYTE_ARRAY")
}

func (NotSupported) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
	return dst[:0], errNotSupported("FIXED_LEN_BYTE_ARRAY")
}

func (NotSupported) DecodeLevels(dst []uint8, src []byte) ([]uint8, error) {
	return dst, errNotSupported("LEVELS")
}

func (NotSupported) DecodeBoolean(dst []byte, src []byte) ([]byte, error) {
	return dst, errNotSupported("BOOLEAN")
}

func (NotSupported) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
	return dst, errNotSupported("INT32")
}

func (NotSupported) DecodeInt64(dst []int64, src []byte) ([]int64, error) {
	return dst, errNotSupported("INT64")
}

func (NotSupported) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) {
	return dst, errNotSupported("INT96")
}

func (NotSupported) DecodeFloat(dst []float32, src []byte) ([]float32, error) {
	return dst, errNotSupported("FLOAT")
}

func (NotSupported) DecodeDouble(dst []float64, src []byte) ([]float64, error) {
	return dst, errNotSupported("DOUBLE")
}

func (NotSupported) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) {
	return dst, offsets, errNotSupported("BYTE_ARRAY")
}

func (NotSupported) DecodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
	return dst, errNotSupported("FIXED_LEN_BYTE_ARRAY")
}

func (NotSupported) EstimateDecodeByteArraySize(src []byte) int {
	return 0
}

func (NotSupported) CanDecodeInPlace() bool {
	return false
}

func errNotSupported(typ string) error {
	return fmt.Errorf("%w for type %s", ErrNotSupported, typ)
}

var (
	_ Encoding = NotSupported{}
)


================================================
FILE: encoding/plain/dictionary.go
================================================
package plain

import (
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
)

type DictionaryEncoding struct {
	encoding.NotSupported
	plain Encoding
}

func (e *DictionaryEncoding) String() string {
	return "PLAIN_DICTIONARY"
}

func (e *DictionaryEncoding) Encoding() format.Encoding {
	return format.PlainDictionary
}

func (e *DictionaryEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
	return e.plain.EncodeInt32(dst, src)
}

func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
	return e.plain.DecodeInt32(dst, src)
}


================================================
FILE: encoding/plain/plain.go
================================================
// Package plain implements the PLAIN parquet encoding.
//
// https://github.com/apache/parquet-format/blob/master/Encodings.md#plain-plain--0
package plain

import (
	"encoding/binary"
	"fmt"
	"io"
	"math"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

const (
	ByteArrayLengthSize = 4
	MaxByteArrayLength  = math.MaxInt32
)

type Encoding struct {
	encoding.NotSupported
}

func (e *Encoding) String() string {
	return "PLAIN"
}

func (e *Encoding) Encoding() format.Encoding {
	return format.Plain
}

func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) {
	return append(dst[:0], src...), nil
}

func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
	return append(dst[:0], unsafecast.Int32ToBytes(src)...), nil
}

func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
	return append(dst[:0], unsafecast.Int64ToBytes(src)...), nil
}

func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) {
	return append(dst[:0], deprecated.Int96ToBytes(src)...), nil
}

func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
	return append(dst[:0], unsafecast.Float32ToBytes(src)...), nil
}

func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
	return append(dst[:0], unsafecast.Float64ToBytes(src)...), nil
}

func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
	dst = dst[:0]

	if len(offsets) > 0 {
		baseOffset := offsets[0]

		for _, endOffset := range offsets[1:] {
			dst = AppendByteArray(dst, src[baseOffset:endOffset:endOffset])
			baseOffset = endOffset
		}
	}

	return dst, nil
}

func (e *Encoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
		return dst[:0], encoding.Error(e, encoding.ErrInvalidArgument)
	}
	return append(dst[:0], src...), nil
}

func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) {
	return append(dst[:0], src...), nil
}

func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
	if (len(src) % 4) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "INT32", len(src))
	}
	return append(dst[:0], unsafecast.BytesToInt32(src)...), nil
}

func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, error) {
	if (len(src) % 8) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "INT64", len(src))
	}
	return append(dst[:0], unsafecast.BytesToInt64(src)...), nil
}

func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]deprecated.Int96, error) {
	if (len(src) % 12) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "INT96", len(src))
	}
	return append(dst[:0], deprecated.BytesToInt96(src)...), nil
}

func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, error) {
	if (len(src) % 4) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "FLOAT", len(src))
	}
	return append(dst[:0], unsafecast.BytesToFloat32(src)...), nil
}

func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64, error) {
	if (len(src) % 8) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "DOUBLE", len(src))
	}
	return append(dst[:0], unsafecast.BytesToFloat64(src)...), nil
}

func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, []uint32, error) {
	dst, offsets = dst[:0], offsets[:0]

	for i := 0; i < len(src); {
		if (len(src) - i) < ByteArrayLengthSize {
			return dst, offsets, ErrTooShort(len(src))
		}
		n := ByteArrayLength(src[i:])
		if n > (len(src) - ByteArrayLengthSize) {
			return dst, offsets, ErrTooShort(len(src))
		}
		i += ByteArrayLengthSize
		offsets = append(offsets, uint32(len(dst)))
		dst = append(dst, src[i:i+n]...)
		i += n
	}

	return dst, append(offsets, uint32(len(dst))), nil
}

func (e *Encoding) DecodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
	if size < 0 || size > encoding.MaxFixedLenByteArraySize {
		return dst, encoding.Error(e, encoding.ErrInvalidArgument)
	}
	if (len(src) % size) != 0 {
		return dst, encoding.ErrDecodeInvalidInputSize(e, "FIXED_LEN_BYTE_ARRAY", len(src))
	}
	return append(dst[:0], src...), nil
}

func (e *Encoding) EstimateDecodeByteArraySize(src []byte) int {
	return len(src)
}

func (e *Encoding) CanDecodeInPlace() bool {
	return true
}

func Boolean(v bool) []byte { return AppendBoolean(nil, 0, v) }

func Int32(v int32) []byte { return AppendInt32(nil, v) }

func Int64(v int64) []byte { return AppendInt64(nil, v) }

func Int96(v deprecated.Int96) []byte { return AppendInt96(nil, v) }

func Float(v float32) []byte { return AppendFloat(nil, v) }

func Double(v float64) []byte { return AppendDouble(nil, v) }

func ByteArray(v []byte) []byte { return AppendByteArray(nil, v) }

func AppendBoolean(b []byte, n int, v bool) []byte {
	i := n / 8
	j := n % 8

	if cap(b) > i {
		b = b[:i+1]
	} else {
		tmp := make([]byte, i+1, 2*(i+1))
		copy(tmp, b)
		b = tmp
	}

	k := uint(j)
	x := byte(0)
	if v {
		x = 1
	}

	b[i] = (b[i] & ^(1 << k)) | (x << k)
	return b
}

func AppendInt32(b []byte, v int32) []byte {
	x := [4]byte{}
	binary.LittleEndian.PutUint32(x[:], uint32(v))
	return append(b, x[:]...)
}

func AppendInt64(b []byte, v int64) []byte {
	x := [8]byte{}
	binary.LittleEndian.PutUint64(x[:], uint64(v))
	return append(b, x[:]...)
}

func AppendInt96(b []byte, v deprecated.Int96) []byte {
	x := [12]byte{}
	binary.LittleEndian.PutUint32(x[0:4], v[0])
	binary.LittleEndian.PutUint32(x[4:8], v[1])
	binary.LittleEndian.PutUint32(x[8:12], v[2])
	return append(b, x[:]...)
}

func AppendFloat(b []byte, v float32) []byte {
	x := [4]byte{}
	binary.LittleEndian.PutUint32(x[:], math.Float32bits(v))
	return append(b, x[:]...)
}

func AppendDouble(b []byte, v float64) []byte {
	x := [8]byte{}
	binary.LittleEndian.PutUint64(x[:], math.Float64bits(v))
	return append(b, x[:]...)
}

func AppendByteArray(b, v []byte) []byte {
	length := [ByteArrayLengthSize]byte{}
	PutByteArrayLength(length[:], len(v))
	b = append(b, length[:]...)
	b = append(b, v...)
	return b
}

func AppendByteArrayString(b []byte, v string) []byte {
	length := [ByteArrayLengthSize]byte{}
	PutByteArrayLength(length[:], len(v))
	b = append(b, length[:]...)
	b = append(b, v...)
	return b
}

func AppendByteArrayLength(b []byte, n int) []byte {
	length := [ByteArrayLengthSize]byte{}
	PutByteArrayLength(length[:], n)
	return append(b, length[:]...)
}

func ByteArrayLength(b []byte) int {
	return int(binary.LittleEndian.Uint32(b))
}

func PutByteArrayLength(b []byte, n int) {
	binary.LittleEndian.PutUint32(b, uint32(n))
}

func RangeByteArray(b []byte, do func([]byte) error) (err error) {
	for len(b) > 0 {
		var v []byte
		if v, b, err = NextByteArray(b); err != nil {
			return err
		}
		if err = do(v); err != nil {
			return err
		}
	}
	return nil
}

func NextByteArray(b []byte) (v, r []byte, err error) {
	if len(b) < ByteArrayLengthSize {
		return nil, b, ErrTooShort(len(b))
	}
	n := ByteArrayLength(b)
	if n > (len(b) - ByteArrayLengthSize) {
		return nil, b, ErrTooShort(len(b))
	}
	if n > MaxByteArrayLength {
		return nil, b, ErrTooLarge(n)
	}
	n += ByteArrayLengthSize
	return b[ByteArrayLengthSize:n:n], b[n:len(b):len(b)], nil
}

func ErrTooShort(length int) error {
	return fmt.Errorf("input of length %d is too short to contain a PLAIN encoded byte array value: %w", length, io.ErrUnexpectedEOF)
}

func ErrTooLarge(length int) error {
	return fmt.Errorf("byte array of length %d is too large to be encoded", length)
}


================================================
FILE: encoding/plain/plain_test.go
================================================
package plain_test

import (
	"bytes"
	"testing"

	"github.com/segmentio/parquet-go/encoding/plain"
)

func TestAppendBoolean(t *testing.T) {
	values := []byte{}

	for i := 0; i < 100; i++ {
		values = plain.AppendBoolean(values, i, (i%2) != 0)
	}

	if !bytes.Equal(values, []byte{
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b10101010,
		0b00001010,
	}) {
		t.Errorf("%08b\n", values)
	}
}


================================================
FILE: encoding/rle/dictionary.go
================================================
package rle

import (
	"math/bits"

	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

type DictionaryEncoding struct {
	encoding.NotSupported
}

func (e *DictionaryEncoding) String() string {
	return "RLE_DICTIONARY"
}

func (e *DictionaryEncoding) Encoding() format.Encoding {
	return format.RLEDictionary
}

func (e *DictionaryEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
	bitWidth := maxLenInt32(src)
	dst = append(dst[:0], byte(bitWidth))
	dst, err := encodeInt32(dst, src, uint(bitWidth))
	return dst, e.wrap(err)
}

func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
	if len(src) == 0 {
		return dst[:0], nil
	}
	buf := unsafecast.Int32ToBytes(dst)
	buf, err := decodeInt32(buf[:0], src[1:], uint(src[0]))
	return unsafecast.BytesToInt32(buf), e.wrap(err)
}

func (e *DictionaryEncoding) wrap(err error) error {
	if err != nil {
		err = encoding.Error(e, err)
	}
	return err
}

func clearInt32(data []int32) {
	for i := range data {
		data[i] = 0
	}
}

func maxLenInt32(data []int32) (max int) {
	for _, v := range data {
		if n := bits.Len32(uint32(v)); n > max {
			max = n
		}
	}
	return max
}


================================================
FILE: encoding/rle/rle.go
================================================
// Package rle implements the hybrid RLE/Bit-Packed encoding employed in
// repetition and definition levels, dictionary indexed data pages, and
// boolean values in the PLAIN encoding.
//
// https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3
package rle

import (
	"encoding/binary"
	"fmt"
	"io"
	"unsafe"

	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/bitpack"
	"github.com/segmentio/parquet-go/internal/bytealg"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

const (
	// This limit is intended to prevent unbounded memory allocations when
	// decoding runs.
	//
	// We use a generous limit which allows for over 16 million values per page
	// if there is only one run to encode the repetition or definition levels
	// (this should be uncommon).
	maxSupportedValueCount = 16 * 1024 * 1024
)

type Encoding struct {
	encoding.NotSupported
	BitWidth int
}

func (e *Encoding) String() string {
	return "RLE"
}

func (e *Encoding) Encoding() format.Encoding {
	return format.RLE
}

func (e *Encoding) EncodeLevels(dst []byte, src []uint8) ([]byte, error) {
	dst, err := encodeBytes(dst[:0], src, uint(e.BitWidth))
	return dst, e.wrap(err)
}

func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) {
	// In the case of encoding a boolean values, the 4 bytes length of the
	// output is expected by the parquet format. We add the bytes as placeholder
	// before appending the encoded data.
	dst = append(dst[:0], 0, 0, 0, 0)
	dst, err := encodeBits(dst, src)
	binary.LittleEndian.PutUint32(dst, uint32(len(dst))-4)
	return dst, e.wrap(err)
}

func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
	dst, err := encodeInt32(dst[:0], src, uint(e.BitWidth))
	return dst, e.wrap(err)
}

func (e *Encoding) DecodeLevels(dst []uint8, src []byte) ([]uint8, error) {
	dst, err := decodeBytes(dst[:0], src, uint(e.BitWidth))
	return dst, e.wrap(err)
}

func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, error) {
	if len(src) == 4 {
		return dst[:0], nil
	}
	if len(src) < 4 {
		return dst[:0], fmt.Errorf("input shorter than 4 bytes: %w", io.ErrUnexpectedEOF)
	}
	n := int(binary.LittleEndian.Uint32(src))
	src = src[4:]
	if n > len(src) {
		return dst[:0], fmt.Errorf("input shorter than length prefix: %d < %d: %w", len(src), n, io.ErrUnexpectedEOF)
	}
	dst, err := decodeBits(dst[:0], src[:n])
	return dst, e.wrap(err)
}

func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, error) {
	buf := unsafecast.Int32ToBytes(dst)
	buf, err := decodeInt32(buf[:0], src, uint(e.BitWidth))
	return unsafecast.BytesToInt32(buf), e.wrap(err)
}

func (e *Encoding) wrap(err error) error {
	if err != nil {
		err = encoding.Error(e, err)
	}
	return err
}

func encodeBits(dst, src []byte) ([]byte, error) {
	if len(src) == 0 || isZero(src) || isOnes(src) {
		dst = appendUvarint(dst, uint64(8*len(src))<<1)
		if len(src) > 0 {
			dst = append(dst, src[0])
		}
		return dst, nil
	}

	for i := 0; i < len(src); {
		j := i + 1

		// Look for contiguous sections of 8 bits, all zeros or ones; these
		// are run-length encoded as it only takes 2 or 3 bytes to store these
		// sequences.
		if src[i] == 0 || src[i] == 0xFF {
			for j < len(src) && src[i] == src[j] {
				j++
			}

			if n := j - i; n > 1 {
				dst = appendRunLengthBits(dst, 8*n, src[i])
				i = j
				continue
			}
		}

		// Sequences of bits that are neither all zeroes or ones are bit-packed,
		// which is a simple copy of the input to the output preceded with the
		// bit-pack header.
		for j < len(src) && (src[j-1] != src[j] || (src[j] != 0 && src[j] == 0xFF)) {
			j++
		}

		if (j-i) > 1 && j < len(src) {
			j--
		}

		dst = appendBitPackedBits(dst, src[i:j])
		i = j
	}
	return dst, nil
}

func encodeBytes(dst, src []byte, bitWidth uint) ([]byte, error) {
	if bitWidth > 8 {
		return dst, errEncodeInvalidBitWidth("INT8", bitWidth)
	}
	if bitWidth == 0 {
		if !isZero(src) {
			return dst, errEncodeInvalidBitWidth("INT8", bitWidth)
		}
		return appendUvarint(dst, uint64(len(src))<<1), nil
	}

	if len(src) >= 8 {
		words := unsafe.Slice((*uint64)(unsafe.Pointer(&src[0])), len(src)/8)

		for i := 0; i < len(words); {
			j := i
			pattern := broadcast8x1(words[i])

			for j < len(words) && words[j] == pattern {
				j++
			}

			if i < j {
				dst = appendRunLengthBytes(dst, 8*(j-i), byte(pattern))
			} else {
				j++

				for j < len(words) && words[j] != broadcast8x1(words[j-1]) {
					j++
				}

				dst = appendBitPackedBytes(dst, words[i:j], bitWidth)
			}

			i = j
		}
	}

	for i := (len(src) / 8) * 8; i < len(src); {
		j := i + 1

		for j < len(src) && src[i] == src[j] {
			j++
		}

		dst = appendRunLengthBytes(dst, j-i, src[i])
		i = j
	}

	return dst, nil
}

func encodeInt32(dst []byte, src []int32, bitWidth uint) ([]byte, error) {
	if bitWidth > 32 {
		return dst, errEncodeInvalidBitWidth("INT32", bitWidth)
	}
	if bitWidth == 0 {
		if !isZero(unsafecast.Int32ToBytes(src)) {
			return dst, errEncodeInvalidBitWidth("INT32", bitWidth)
		}
		return appendUvarint(dst, uint64(len(src))<<1), nil
	}

	if len(src) >= 8 {
		words := unsafe.Slice((*[8]int32)(unsafe.Pointer(&src[0])), len(src)/8)

		for i := 0; i < len(words); {
			j := i
			pattern := broadcast8x4(words[i][0])

			for j < len(words) && words[j] == pattern {
				j++
			}

			if i < j {
				dst = appendRunLengthInt32(dst, 8*(j-i), pattern[0], bitWidth)
			} else {
				j += 1
				j += encodeInt32IndexEqual8Contiguous(words[j:])
				dst = appendBitPackedInt32(dst, words[i:j], bitWidth)
			}

			i = j
		}
	}

	for i := (len(src) / 8) * 8; i < len(src); {
		j := i + 1

		for j < len(src) && src[i] == src[j] {
			j++
		}

		dst = appendRunLengthInt32(dst, j-i, src[i], bitWidth)
		i = j
	}

	return dst, nil
}

func decodeBits(dst, src []byte) ([]byte, error) {
	for i := 0; i < len(src); {
		u, n := binary.Uvarint(src[i:])
		if n == 0 {
			return dst, fmt.Errorf("decoding run-length block header: %w", io.ErrUnexpectedEOF)
		}
		if n < 0 {
			return dst, fmt.Errorf("overflow after decoding %d/%d bytes of run-length block header", -n+i, len(src))
		}
		i += n

		count, bitpacked := uint(u>>1), (u&1) != 0
		if count > maxSupportedValueCount {
			return dst, fmt.Errorf("decoded run-length block cannot have more than %d values", maxSupportedValueCount)
		}
		if bitpacked {
			n := int(count)
			j := i + n

			if j > len(src) {
				return dst, fmt.Errorf("decoding bit-packed block of %d values: %w", n, io.ErrUnexpectedEOF)
			}

			dst = append(dst, src[i:j]...)
			i = j
		} else {
			word := byte(0)
			if i < len(src) {
				word = src[i]
				i++
			}

			offset := len(dst)
			length := bitpack.ByteCount(count)
			dst = resize(dst, offset+length)
			bytealg.Broadcast(dst[offset:], word)
		}
	}
	return dst, nil
}

func decodeBytes(dst, src []byte, bitWidth uint) ([]byte, error) {
	if bitWidth > 8 {
		return dst, errDecodeInvalidBitWidth("INT8", bitWidth)
	}

	for i := 0; i < len(src); {
		u, n := binary.Uvarint(src[i:])
		if n == 0 {
			return dst, fmt.Errorf("decoding run-length block header: %w", io.ErrUnexpectedEOF)
		}
		if n < 0 {
			return dst, fmt.Errorf("overflow after decoding %d/%d bytes of run-length block header", -n+i, len(src))
		}
		i += n

		count, bitpacked := uint(u>>1), (u&1) != 0
		if count > maxSupportedValueCount {
			return dst, fmt.Errorf("decoded run-length block cannot have more than %d values", maxSupportedValueCount)
		}
		if bitpacked {
			count *= 8
			j := i + bitpack.ByteCount(count*bitWidth)

			if j > len(src) {
				return dst, fmt.Errorf("decoding bit-packed block of %d values: %w", 8*count, io.ErrUnexpectedEOF)
			}

			offset := len(dst)
			length := int(count)
			dst = resize(dst, offset+length)
			decodeBytesBitpack(dst[offset:], src[i:j], count, bitWidth)

			i = j
		} else {
			if bitWidth != 0 && (i+1) > len(src) {
				return dst, fmt.Errorf("decoding run-length block of %d values: %w", count, io.ErrUnexpectedEOF)
			}

			word := byte(0)
			if bitWidth != 0 {
				word = src[i]
				i++
			}

			offset := len(dst)
			length := int(count)
			dst = resize(dst, offset+length)
			bytealg.Broadcast(dst[offset:], word)
		}
	}

	return dst, nil
}

func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) {
	if bitWidth > 32 {
		return dst, errDecodeInvalidBitWidth("INT32", bitWidth)
	}

	buf := make([]byte, 2*bitpack.PaddingInt32)

	for i := 0; i < len(src); {
		u, n := binary.Uvarint(src[i:])
		if n == 0 {
			return dst, fmt.Errorf("decoding run-length block header: %w", io.ErrUnexpectedEOF)
		}
		if n < 0 {
			return dst, fmt.Errorf("overflow after decoding %d/%d bytes of run-length block header", -n+i, len(src))
		}
		i += n

		count, bitpacked := uint(u>>1), (u&1) != 0
		if count > maxSupportedValueCount {
			return dst, fmt.Errorf("decoded run-length block cannot have more than %d values", maxSupportedValueCount)
		}
		if bitpacked {
			offset := len(dst)
			length := int(count * bitWidth)
			dst = resize(dst, offset+4*8*int(count))

			// The bitpack.UnpackInt32 function requires the input to be padded
			// or the function panics. If there is enough room in the input
			// buffer we can use it, otherwise we have to copy it to a larger
			// location (which should rarely happen).
			in := src[i : i+length]
			if (cap(in) - len(in)) >= bitpack.PaddingInt32 {
				in = in[:cap(in)]
			} else {
				buf = resize(buf, len(in)+bitpack.PaddingInt32)
				copy(buf, in)
				in = buf
			}

			out := unsafecast.BytesToInt32(dst[offset:])
			bitpack.UnpackInt32(out, in, bitWidth)
			i += length
		} else {
			j := i + bitpack.ByteCount(bitWidth)

			if j > len(src) {
				return dst, fmt.Errorf("decoding run-length block of %d values: %w", count, io.ErrUnexpectedEOF)
			}

			bits := [4]byte{}
			copy(bits[:], src[i:j])
			dst = appendRepeat(dst, bits[:], count)
			i = j
		}
	}

	return dst, nil
}

func errEncodeInvalidBitWidth(typ string, bitWidth uint) error {
	return errInvalidBitWidth("encode", typ, bitWidth)
}

func errDecodeInvalidBitWidth(typ string, bitWidth uint) error {
	return errInvalidBitWidth("decode", typ, bitWidth)
}

func errInvalidBitWidth(op, typ string, bitWidth uint) error {
	return fmt.Errorf("cannot %s %s with invalid bit-width=%d", op, typ, bitWidth)
}

func appendRepeat(dst, pattern []byte, count uint) []byte {
	offset := len(dst)
	length := int(count) * len(pattern)
	dst = resize(dst, offset+length)
	i := offset + copy(dst[offset:], pattern)
	for i < len(dst) {
		i += copy(dst[i:], dst[offset:i])
	}
	return dst
}

func appendUvarint(dst []byte, u uint64) []byte {
	var b [binary.MaxVarintLen64]byte
	var n = binary.PutUvarint(b[:], u)
	return append(dst, b[:n]...)
}

func appendRunLengthBits(dst []byte, count int, value byte) []byte {
	return appendRunLengthBytes(dst, count, value)
}

func appendBitPackedBits(dst []byte, words []byte) []byte {
	n := len(dst)
	dst = resize(dst, n+binary.MaxVarintLen64+len(words))
	n += binary.PutUvarint(dst[n:], uint64(len(words)<<1)|1)
	n += copy(dst[n:], words)
	return dst[:n]
}

func appendRunLengthBytes(dst []byte, count int, value byte) []byte {
	n := len(dst)
	dst = resize(dst, n+binary.MaxVarintLen64+1)
	n += binary.PutUvarint(dst[n:], uint64(count)<<1)
	dst[n] = value
	return dst[:n+1]
}

func appendBitPackedBytes(dst []byte, words []uint64, bitWidth uint) []byte {
	n := len(dst)
	dst = resize(dst, n+binary.MaxVarintLen64+(len(words)*int(bitWidth))+8)
	n += binary.PutUvarint(dst[n:], uint64(len(words)<<1)|1)
	n += encodeBytesBitpack(dst[n:], words, bitWidth)
	return dst[:n]
}

func appendRunLengthInt32(dst []byte, count int, value int32, bitWidth uint) []byte {
	n := len(dst)
	dst = resize(dst, n+binary.MaxVarintLen64+4)
	n += binary.PutUvarint(dst[n:], uint64(count)<<1)
	binary.LittleEndian.PutUint32(dst[n:], uint32(value))
	return dst[:n+bitpack.ByteCount(bitWidth)]
}

func appendBitPackedInt32(dst []byte, words [][8]int32, bitWidth uint) []byte {
	n := len(dst)
	dst = resize(dst, n+binary.MaxVarintLen64+(len(words)*int(bitWidth))+32)
	n += binary.PutUvarint(dst[n:], uint64(len(words))<<1|1)
	n += encodeInt32Bitpack(dst[n:], words, bitWidth)
	return dst[:n]
}

func broadcast8x1(v uint64) uint64 {
	return (v & 0xFF) * 0x0101010101010101
}

func broadcast8x4(v int32) [8]int32 {
	return [8]int32{v, v, v, v, v, v, v, v}
}

func isZero(data []byte) bool {
	return bytealg.Count(data, 0x00) == len(data)
}

func isOnes(data []byte) bool {
	return bytealg.Count(data, 0xFF) == len(data)
}

func resize(buf []byte, size int) []byte {
	if cap(buf) < size {
		return grow(buf, size)
	}
	return buf[:size]
}

func grow(buf []byte, size int) []byte {
	newCap := 2 * cap(buf)
	if newCap < size {
		newCap = size
	}
	newBuf := make([]byte, size, newCap)
	copy(newBuf, buf)
	return newBuf
}

func encodeInt32BitpackDefault(dst []byte, src [][8]int32, bitWidth uint) int {
	bits := unsafe.Slice((*int32)(unsafe.Pointer(&src[0])), len(src)*8)
	bitpack.PackInt32(dst, bits, bitWidth)
	return bitpack.ByteCount(uint(len(src)*8) * bitWidth)
}

func encodeBytesBitpackDefault(dst []byte, src []uint64, bitWidth uint) int {
	bitMask := uint64(1<<bitWidth) - 1
	n := 0

	for _, word := range src {
		word = (word & bitMask) |
			(((word >> 8) & bitMask) << (1 * bitWidth)) |
			(((word >> 16) & bitMask) << (2 * bitWidth)) |
			(((word >> 24) & bitMask) << (3 * bitWidth)) |
			(((word >> 32) & bitMask) << (4 * bitWidth)) |
			(((word >> 40) & bitMask) << (5 * bitWidth)) |
			(((word >> 48) & bitMask) << (6 * bitWidth)) |
			(((word >> 56) & bitMask) << (7 * bitWidth))
		binary.LittleEndian.PutUint64(dst[n:], word)
		n += int(bitWidth)
	}

	return n
}

func decodeBytesBitpackDefault(dst, src []byte, count, bitWidth uint) {
	dst = dst[:0]

	bitMask := uint64(1<<bitWidth) - 1
	byteCount := bitpack.ByteCount(8 * bitWidth)

	for i := 0; count > 0; count -= 8 {
		j := i + byteCount

		bits := [8]byte{}
		copy(bits[:], src[i:j])
		word := binary.LittleEndian.Uint64(bits[:])

		dst = append(dst,
			byte((word>>(0*bitWidth))&bitMask),
			byte((word>>(1*bitWidth))&bitMask),
			byte((word>>(2*bitWidth))&bitMask),
			byte((word>>(3*bitWidth))&bitMask),
			byte((word>>(4*bitWidth))&bitMask),
			byte((word>>(5*bitWidth))&bitMask),
			byte((word>>(6*bitWidth))&bitMask),
			byte((word>>(7*bitWidth))&bitMask),
		)

		i = j
	}
}


================================================
FILE: encoding/rle/rle_amd64.go
================================================
//go:build !purego

package rle

import (
	"golang.org/x/sys/cpu"
)

var (
	encodeInt32IndexEqual8Contiguous func(words [][8]int32) int
	encodeInt32Bitpack               func(dst []byte, src [][8]int32, bitWidth uint) int
	encodeBytesBitpack               func(dst []byte, src []uint64, bitWidth uint) int
	decodeBytesBitpack               func(dst, src []byte, count, bitWidth uint)
)

func init() {
	switch {
	case cpu.X86.HasAVX2:
		encodeInt32IndexEqual8Contiguous = encodeInt32IndexEqual8ContiguousAVX2
		encodeInt32Bitpack = encodeInt32BitpackAVX2
	default:
		encodeInt32IndexEqual8Contiguous = encodeInt32IndexEqual8ContiguousSSE
		encodeInt32Bitpack = encodeInt32BitpackDefault
	}

	switch {
	case cpu.X86.HasBMI2:
		encodeBytesBitpack = encodeBytesBitpackBMI2
		decodeBytesBitpack = decodeBytesBitpackBMI2
	default:
		encodeBytesBitpack = encodeBytesBitpackDefault
		decodeBytesBitpack = decodeBytesBitpackDefault
	}
}

//go:noescape
func encodeBytesBitpackBMI2(dst []byte, src []uint64, bitWidth uint) int

//go:noescape
func encodeInt32IndexEqual8ContiguousAVX2(words [][8]int32) int

//go:noescape
func encodeInt32IndexEqual8ContiguousSSE(words [][8]int32) int

//go:noescape
func encodeInt32Bitpack1to16bitsAVX2(dst []byte, src [][8]int32, bitWidth uint) int

func encodeInt32BitpackAVX2(dst []byte, src [][8]int32, bitWidth uint) int {
	switch {
	case bitWidth == 0:
		return 0
	case bitWidth <= 16:
		return encodeInt32Bitpack1to16bitsAVX2(dst, src, bitWidth)
	default:
		return encodeInt32BitpackDefault(dst, src, bitWidth)
	}
}

//go:noescape
func decodeBytesBitpackBMI2(dst, src []byte, count, bitWidth uint)


================================================
FILE: encoding/rle/rle_amd64.s
================================================
//go:build !purego

#include "textflag.h"

GLOBL bitMasks<>(SB), RODATA|NOPTR, $64
DATA bitMasks<>+0(SB)/8,  $0b0000000100000001000000010000000100000001000000010000000100000001
DATA bitMasks<>+8(SB)/8,  $0b0000001100000011000000110000001100000011000000110000001100000011
DATA bitMasks<>+16(SB)/8, $0b0000011100000111000001110000011100000111000001110000011100000111
DATA bitMasks<>+24(SB)/8, $0b0000111100001111000011110000111100001111000011110000111100001111
DATA bitMasks<>+32(SB)/8, $0b0001111100011111000111110001111100011111000111110001111100011111
DATA bitMasks<>+40(SB)/8, $0b0011111100111111001111110011111100111111001111110011111100111111
DATA bitMasks<>+48(SB)/8, $0b0111111101111111011111110111111101111111011111110111111101111111
DATA bitMasks<>+56(SB)/8, $0b1111111111111111111111111111111111111111111111111111111111111111

// func decodeBytesBitpackBMI2(dst, src []byte, count, bitWidth uint)
TEXT ·decodeBytesBitpackBMI2(SB), NOSPLIT, $0-64
    MOVQ dst_base+0(FP), AX
    MOVQ src_base+24(FP), BX
    MOVQ count+48(FP), CX
    MOVQ bitWidth+56(FP), DX
    LEAQ bitMasks<>(SB), DI
    MOVQ -8(DI)(DX*8), DI
    XORQ SI, SI
    SHRQ $3, CX
    JMP test
loop:
    MOVQ (BX), R8
    PDEPQ DI, R8, R8
    MOVQ R8, (AX)(SI*8)
    ADDQ DX, BX
    INCQ SI
test:
    CMPQ SI, CX
    JNE loop
    RET

// func encodeBytesBitpackBMI2(dst []byte, src []uint64, bitWidth uint) int
TEXT ·encodeBytesBitpackBMI2(SB), NOSPLIT, $0-64
    MOVQ dst_base+0(FP), AX
    MOVQ src_base+24(FP), BX
    MOVQ src_len+32(FP), CX
    MOVQ bitWidth+48(FP), DX
    LEAQ bitMasks<>(SB), DI
    MOVQ -8(DI)(DX*8), DI
    XORQ SI, SI
    JMP test
loop:
    MOVQ (BX)(SI*8), R8
    PEXTQ DI, R8, R8
    MOVQ R8, (AX)
    ADDQ DX, AX
    INCQ SI
test:
    CMPQ SI, CX
    JNE loop
done:
    SUBQ dst+0(FP), AX
    MOVQ AX, ret+56(FP)
    RET

// func encodeInt32IndexEqual8ContiguousAVX2(words [][8]int32) int
TEXT ·encodeInt32IndexEqual8ContiguousAVX2(SB), NOSPLIT, $0-32
    MOVQ words_base+0(FP), AX
    MOVQ words_len+8(FP), BX
    XORQ SI, SI
    SHLQ $5, BX
    JMP test
loop:
    VMOVDQU (AX)(SI*1), Y0
    VPSHUFD $0, Y0, Y1
    VPCMPEQD Y1, Y0, Y0
    VMOVMSKPS Y0, CX
    CMPL CX, $0xFF
    JE done
    ADDQ $32, SI
test:
    CMPQ SI, BX
    JNE loop
done:
    VZEROUPPER
    SHRQ $5, SI
    MOVQ SI, ret+24(FP)
    RET

// func encodeInt32IndexEqual8ContiguousSSE(words [][8]int32) int
TEXT ·encodeInt32IndexEqual8ContiguousSSE(SB), NOSPLIT, $0-32
    MOVQ words_base+0(FP), AX
    MOVQ words_len+8(FP), BX
    XORQ SI, SI
    SHLQ $5, BX
    JMP test
loop:
    MOVOU (AX)(SI*1), X0
    MOVOU 16(AX)(SI*1), X1
    PSHUFD $0, X0, X2
    PCMPEQL X2, X0
    PCMPEQL X2, X1
    MOVMSKPS X0, CX
    MOVMSKPS X1, DX
    ANDL DX, CX
    CMPL CX, $0xF
    JE done
    ADDQ $32, SI
test:
    CMPQ SI, BX
    JNE loop
done:
    SHRQ $5, SI
    MOVQ SI, ret+24(FP)
    RET

// func encodeInt32Bitpack1to16bitsAVX2(dst []byte, src [][8]int32, bitWidth uint) int
TEXT ·encodeInt32Bitpack1to16bitsAVX2(SB), NOSPLIT, $0-64
    MOVQ dst_base+0(FP), AX
    MOVQ src_base+24(FP), BX
    MOVQ src_len+32(FP), CX
    MOVQ bitWidth+48(FP), DX

    MOVQ DX, X0
    VPBROADCASTQ X0, Y6 // [1*bitWidth...]
    VPSLLQ $1, Y6, Y7   // [2*bitWidth...]
    VPADDQ Y6, Y7, Y8   // [3*bitWidth...]
    VPSLLQ $2, Y6, Y9   // [4*bitWidth...]

    MOVQ $64, DI
    MOVQ DI, X1
    VPBROADCASTQ X1, Y10
    VPSUBQ Y6, Y10, Y11 // [64-1*bitWidth...]
    VPSUBQ Y9, Y10, Y12 // [64-4*bitWidth...]
    VPCMPEQQ Y4, Y4, Y4
    VPSRLVQ Y11, Y4, Y4

    VPXOR Y5, Y5, Y5
    XORQ SI, SI
    SHLQ $5, CX
    JMP test
loop:
    VMOVDQU (BX)(SI*1), Y0
    VPSHUFD $0b01010101, Y0, Y1
    VPSHUFD $0b10101010, Y0, Y2
    VPSHUFD $0b11111111, Y0, Y3

    VPAND Y4, Y0, Y0
    VPAND Y4, Y1, Y1
    VPAND Y4, Y2, Y2
    VPAND Y4, Y3, Y3

    VPSLLVQ Y6, Y1, Y1
    VPSLLVQ Y7, Y2, Y2
    VPSLLVQ Y8, Y3, Y3

    VPOR Y1, Y0, Y0
    VPOR Y3, Y2, Y2
    VPOR Y2, Y0, Y0

    VPERMQ $0b00001010, Y0, Y1

    VPSLLVQ X9, X1, X2
    VPSRLQ X12, X1, X3
    VBLENDPD $0b10, X3, X2, X1
    VBLENDPD $0b10, X5, X0, X0
    VPOR X1, X0, X0

    VMOVDQU X0, (AX)

    ADDQ DX, AX
    ADDQ $32, SI
test:
    CMPQ SI, CX
    JNE loop
    VZEROUPPER
    SUBQ dst+0(FP), AX
    MOVQ AX, ret+56(FP)
    RET


================================================
FILE: encoding/rle/rle_amd64_test.go
================================================
//go:build go1.18 && !purego && amd64
// +build go1.18,!purego,amd64

package rle

import "testing"

func TestEncodeInt32IndexEqual8ContiguousAVX2(t *testing.T) {
	testEncodeInt32IndexEqual8Contiguous(t, encodeInt32IndexEqual8ContiguousAVX2)
}

func TestEncodeInt32IndexEqual8ContiguousSSE(t *testing.T) {
	testEncodeInt32IndexEqual8Contiguous(t, encodeInt32IndexEqual8ContiguousSSE)
}

func BenchmarkEncodeInt32IndexEqual8ContiguousAVX2(b *testing.B) {
	benchmarkEncodeInt32IndexEqual8Contiguous(b, encodeInt32IndexEqual8ContiguousAVX2)
}

func BenchmarkEncodeInt32IndexEqual8ContiguousSSE(b *testing.B) {
	benchmarkEncodeInt32IndexEqual8Contiguous(b, encodeInt32IndexEqual8ContiguousSSE)
}


================================================
FILE: encoding/rle/rle_purego.go
================================================
//go:build purego || !amd64

package rle

func encodeBytesBitpack(dst []byte, src []uint64, bitWidth uint) int {
	return encodeBytesBitpackDefault(dst, src, bitWidth)
}

func encodeInt32IndexEqual8Contiguous(words [][8]int32) (n int) {
	for n < len(words) && words[n] != broadcast8x4(words[n][0]) {
		n++
	}
	return n
}

func encodeInt32Bitpack(dst []byte, src [][8]int32, bitWidth uint) int {
	return encodeInt32BitpackDefault(dst, src, bitWidth)
}

func decodeBytesBitpack(dst, src []byte, count, bitWidth uint) {
	decodeBytesBitpackDefault(dst, src, count, bitWidth)
}


================================================
FILE: encoding/rle/rle_test.go
================================================
//go:build go1.18
// +build go1.18

package rle

import (
	"testing"

	"github.com/segmentio/parquet-go/encoding/fuzz"
	"github.com/segmentio/parquet-go/internal/quick"
)

func FuzzEncodeBoolean(f *testing.F) {
	fuzz.EncodeBoolean(f, &Encoding{BitWidth: 1})
}

func FuzzEncodeLevels(f *testing.F) {
	fuzz.EncodeLevels(f, &Encoding{BitWidth: 8})
}

func FuzzEncodeInt32(f *testing.F) {
	fuzz.EncodeInt32(f, &Encoding{BitWidth: 32})
}

func TestEncodeInt32IndexEqual8Contiguous(t *testing.T) {
	testEncodeInt32IndexEqual8Contiguous(t, encodeInt32IndexEqual8Contiguous)
}

func testEncodeInt32IndexEqual8Contiguous(t *testing.T, f func([][8]int32) int) {
	t.Helper()

	err := quick.Check(func(words [][8]int32) bool {
		want := 0

		for want < len(words) && words[want] != broadcast8x4(words[want][0]) {
			want++
		}

		if got := f(words); got != want {
			t.Errorf("want=%d got=%d", want, got)
			return false
		}

		return true
	})
	if err != nil {
		t.Error(err)
	}
}

func BenchmarkEncodeInt32IndexEqual8Contiguous(b *testing.B) {
	benchmarkEncodeInt32IndexEqual8Contiguous(b, encodeInt32IndexEqual8Contiguous)
}

func benchmarkEncodeInt32IndexEqual8Contiguous(b *testing.B, f func([][8]int32) int) {
	words := make([][8]int32, 1000)
	for i := range words {
		words[i][0] = 1
	}
	for i := 0; i < b.N; i++ {
		_ = f(words)
	}
	b.SetBytes(32 * int64(len(words)))
}


================================================
FILE: encoding/rle/testdata/fuzz/FuzzEncodeBoolean/6be5e340694798c2e5b94c758f0262edd2edf8af5795d4c6c60f6e02643bbb96
================================================
go test fuzz v1
[]byte("0\x00\x00")
int64(93)


================================================
FILE: encoding/rle/testdata/fuzz/FuzzEncodeBoolean/9772b3f21a6f61810fe38d120bcc9da6d78540f22dc819a4201283608671fdf4
================================================
go test fuzz v1
[]byte("00000001")
int64(0)


================================================
FILE: encoding/rle/testdata/fuzz/FuzzEncodeInt32/06ba4bdb19de593e669c642987e270fe2488d4d58ecd712db136a3e011071253
================================================
go test fuzz v1
[]byte("0000")
int64(0)


================================================
FILE: encoding/rle/testdata/fuzz/FuzzEncodeLevels/0468684de48f926219bfc47be13ddf085b5a0ed9fbd9c40a005641b253e88d33
================================================
go test fuzz v1
[]byte("\xba\xba\xba\xba0\xba\xba\xba\xba\xba\xba")
int64(0)


================================================
FILE: encoding/test/test_go17.go
================================================
//go:build !go1.17

package test


================================================
FILE: encoding/test/test_go18.go
================================================
//go:build go1.18

package test

import (
	"fmt"
	"testing"

	"github.com/segmentio/parquet-go/encoding"
)

func EncodeInt32(t *testing.T, enc encoding.Encoding, min, max int, bitWidth uint) {
	t.Helper()
	encode(t, enc, min, max,
		encoding.Encoding.EncodeInt32,
		encoding.Encoding.DecodeInt32,
		func(i int) int32 {
			value := int32(i)
			mask := int32((1 << bitWidth) - 1)
			if (i % 2) != 0 {
				value = -value
			}
			return value & mask
		},
	)
}

func EncodeInt64(t *testing.T, enc encoding.Encoding, min, max int, bitWidth uint) {
	t.Helper()
	encode(t, enc, min, max,
		encoding.Encoding.EncodeInt64,
		encoding.Encoding.DecodeInt64,
		func(i int) int64 {
			value := int64(i)
			mask := int64((1 << bitWidth) - 1)
			if (i % 2) != 0 {
				value = -value
			}
			return value & mask
		},
	)
}

func EncodeFloat(t *testing.T, enc encoding.Encoding, min, max int) {
	t.Helper()
	encode(t, enc, min, max,
		encoding.Encoding.EncodeFloat,
		encoding.Encoding.DecodeFloat,
		func(i int) float32 { return float32(i) },
	)
}

func EncodeDouble(t *testing.T, enc encoding.Encoding, min, max int) {
	t.Helper()
	encode(t, enc, min, max,
		encoding.Encoding.EncodeDouble,
		encoding.Encoding.DecodeDouble,
		func(i int) float64 { return float64(i) },
	)
}

type encodingFunc[T comparable] func(encoding.Encoding, []byte, []T) ([]byte, error)

type decodingFunc[T comparable] func(encoding.Encoding, []T, []byte) ([]T, error)

func encode[T comparable](t *testing.T, enc encoding.Encoding, min, max int, encode encodingFunc[T], decode decodingFunc[T], valueOf func(int) T) {
	t.Helper()

	for k := min; k <= max; k++ {
		t.Run(fmt.Sprintf("N=%d", k), func(t *testing.T) {
			src := make([]T, k)
			for i := range src {
				src[i] = valueOf(i)
			}

			buf, err := encode(enc, nil, src)
			if err != nil {
				t.Fatalf("encoding %d values: %v", k, err)
			}

			res, err := decode(enc, nil, buf)
			if err != nil {
				t.Fatalf("decoding %d values: %v", k, err)
			}

			if err := assertEqual(src, res); err != nil {
				t.Fatalf("testing %d values: %v", k, err)
			}
		})
	}
}

func assertEqual[T comparable](want, got []T) error {
	if len(want) != len(got) {
		return fmt.Errorf("number of values mismatch: want=%d got=%d", len(want), len(got))
	}

	for i := range want {
		if want[i] != got[i] {
			return fmt.Errorf("values at index %d/%d mismatch: want=%+v got=%+v", i, len(want), want[i], got[i])
		}
	}

	return nil
}


================================================
FILE: encoding/values.go
================================================
package encoding

import (
	"fmt"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

type Kind int32

const (
	Undefined Kind = iota
	Boolean
	Int32
	Int64
	Int96
	Float
	Double
	ByteArray
	FixedLenByteArray
)

func (kind Kind) String() string {
	switch kind {
	case Boolean:
		return "BOOLEAN"
	case Int32:
		return "INT32"
	case Int64:
		return "INT64"
	case Int96:
		return "INT96"
	case Float:
		return "FLOAT"
	case Double:
		return "DOUBLE"
	case ByteArray:
		return "BYTE_ARRAY"
	case FixedLenByteArray:
		return "FIXED_LEN_BYTE_ARRAY"
	default:
		return "UNDEFINED"
	}
}

type Values struct {
	kind    Kind
	size    int32
	data    []byte
	offsets []uint32
}

func (v *Values) assertKind(kind Kind) {
	if kind != v.kind {
		panic(fmt.Sprintf("cannot convert values of type %s to type %s", v.kind, kind))
	}
}

func (v *Values) assertSize(size int) {
	if size != int(v.size) {
		panic(fmt.Sprintf("cannot convert values of size %d to size %d", v.size, size))
	}
}

func (v *Values) Size() int64 {
	return int64(len(v.data))
}

func (v *Values) Kind() Kind {
	return v.kind
}

func (v *Values) Data() (data []byte, offsets []uint32) {
	return v.data, v.offsets
}

func (v *Values) Boolean() []byte {
	v.assertKind(Boolean)
	return v.data
}

func (v *Values) Int32() []int32 {
	v.assertKind(Int32)
	return unsafecast.BytesToInt32(v.data)
}

func (v *Values) Int64() []int64 {
	v.assertKind(Int64)
	return unsafecast.BytesToInt64(v.data)
}

func (v *Values) Int96() []deprecated.Int96 {
	v.assertKind(Int96)
	return deprecated.BytesToInt96(v.data)
}

func (v *Values) Float() []float32 {
	v.assertKind(Float)
	return unsafecast.BytesToFloat32(v.data)
}

func (v *Values) Double() []float64 {
	v.assertKind(Double)
	return unsafecast.BytesToFloat64(v.data)
}

func (v *Values) ByteArray() (data []byte, offsets []uint32) {
	v.assertKind(ByteArray)
	return v.data, v.offsets
}

func (v *Values) FixedLenByteArray() (data []byte, size int) {
	v.assertKind(FixedLenByteArray)
	return v.data, int(v.size)
}

func (v *Values) Uint32() []uint32 {
	v.assertKind(Int32)
	return unsafecast.BytesToUint32(v.data)
}

func (v *Values) Uint64() []uint64 {
	v.assertKind(Int64)
	return unsafecast.BytesToUint64(v.data)
}

func (v *Values) Uint128() [][16]byte {
	v.assertKind(FixedLenByteArray)
	v.assertSize(16)
	return unsafecast.BytesToUint128(v.data)
}

func BooleanValues(values []byte) Values {
	return Values{
		kind: Boolean,
		data: values,
	}
}

func Int32Values(values []int32) Values {
	return Values{
		kind: Int32,
		data: unsafecast.Int32ToBytes(values),
	}
}

func Int64Values(values []int64) Values {
	return Values{
		kind: Int64,
		data: unsafecast.Int64ToBytes(values),
	}
}

func Int96Values(values []deprecated.Int96) Values {
	return Values{
		kind: Int96,
		data: deprecated.Int96ToBytes(values),
	}
}

func FloatValues(values []float32) Values {
	return Values{
		kind: Float,
		data: unsafecast.Float32ToBytes(values),
	}
}

func DoubleValues(values []float64) Values {
	return Values{
		kind: Double,
		data: unsafecast.Float64ToBytes(values),
	}
}

func ByteArrayValues(values []byte, offsets []uint32) Values {
	return Values{
		kind:    ByteArray,
		data:    values,
		offsets: offsets,
	}
}

func FixedLenByteArrayValues(values []byte, size int) Values {
	return Values{
		kind: FixedLenByteArray,
		size: int32(size),
		data: values,
	}
}

func Uint32Values(values []uint32) Values {
	return Int32Values(unsafecast.Uint32ToInt32(values))
}

func Uint64Values(values []uint64) Values {
	return Int64Values(unsafecast.Uint64ToInt64(values))
}

func Uint128Values(values [][16]byte) Values {
	return FixedLenByteArrayValues(unsafecast.Uint128ToBytes(values), 16)
}

func Int32ValuesFromBytes(values []byte) Values {
	return Values{
		kind: Int32,
		data: values,
	}
}

func Int64ValuesFromBytes(values []byte) Values {
	return Values{
		kind: Int64,
		data: values,
	}
}

func Int96ValuesFromBytes(values []byte) Values {
	return Values{
		kind: Int96,
		data: values,
	}
}

func FloatValuesFromBytes(values []byte) Values {
	return Values{
		kind: Float,
		data: values,
	}
}

func DoubleValuesFromBytes(values []byte) Values {
	return Values{
		kind: Double,
		data: values,
	}
}

func EncodeBoolean(dst []byte, src Values, enc Encoding) ([]byte, error) {
	return enc.EncodeBoolean(dst, src.Boolean())
}

func EncodeInt32(dst []byte, src Values, enc Encoding) ([]byte, error) {
	return enc.EncodeInt32(dst, src.Int32())
}

func EncodeInt64(dst []byte, src Values, enc Encoding) ([]byte, error) {
	return enc.EncodeInt64(dst, src.Int64())
}

func EncodeInt96(dst []byte, src Values, enc Encoding) ([]byte, error) {
	return enc.EncodeInt96(dst, src.Int96())
}

func EncodeFloat(dst []byte, src Values, enc Encoding) ([]byte, error) {
	return enc.EncodeFloat(dst, src.Float())
}

func EncodeDouble(dst []byte, src Values, enc Encoding) ([]byte, error) {
	return enc.EncodeDouble(dst, src.Double())
}

func EncodeByteArray(dst []byte, src Values, enc Encoding) ([]byte, error) {
	values, offsets := src.ByteArray()
	return enc.EncodeByteArray(dst, values, offsets)
}

func EncodeFixedLenByteArray(dst []byte, src Values, enc Encoding) ([]byte, error) {
	data, size := src.FixedLenByteArray()
	return enc.EncodeFixedLenByteArray(dst, data, size)
}

func DecodeBoolean(dst Values, src []byte, enc Encoding) (Values, error) {
	values, err := enc.DecodeBoolean(dst.Boolean(), src)
	return BooleanValues(values), err
}

func DecodeInt32(dst Values, src []byte, enc Encoding) (Values, error) {
	values, err := enc.DecodeInt32(dst.Int32(), src)
	return Int32Values(values), err
}

func DecodeInt64(dst Values, src []byte, enc Encoding) (Values, error) {
	values, err := enc.DecodeInt64(dst.Int64(), src)
	return Int64Values(values), err
}

func DecodeInt96(dst Values, src []byte, enc Encoding) (Values, error) {
	values, err := enc.DecodeInt96(dst.Int96(), src)
	return Int96Values(values), err
}

func DecodeFloat(dst Values, src []byte, enc Encoding) (Values, error) {
	values, err := enc.DecodeFloat(dst.Float(), src)
	return FloatValues(values), err
}

func DecodeDouble(dst Values, src []byte, enc Encoding) (Values, error) {
	values, err := enc.DecodeDouble(dst.Double(), src)
	return DoubleValues(values), err
}

func DecodeByteArray(dst Values, src []byte, enc Encoding) (Values, error) {
	values, offsets := dst.ByteArray()
	values, offsets, err := enc.DecodeByteArray(values, src, offsets)
	return ByteArrayValues(values, offsets), err
}

func DecodeFixedLenByteArray(dst Values, src []byte, enc Encoding) (Values, error) {
	data, size := dst.FixedLenByteArray()
	values, err := enc.DecodeFixedLenByteArray(data, src, size)
	return FixedLenByteArrayValues(values, size), err
}


================================================
FILE: encoding/values_test.go
================================================
package encoding_test

import (
	"testing"
	"unsafe"

	"github.com/segmentio/parquet-go/encoding"
)

func TestValuesSize(t *testing.T) {
	t.Log(unsafe.Sizeof(encoding.Values{}))
}


================================================
FILE: encoding.go
================================================
package parquet

import (
	"math/bits"

	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/encoding/bitpacked"
	"github.com/segmentio/parquet-go/encoding/bytestreamsplit"
	"github.com/segmentio/parquet-go/encoding/delta"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/encoding/rle"
	"github.com/segmentio/parquet-go/format"
)

var (
	// Plain is the default parquet encoding.
	Plain plain.Encoding

	// RLE is the hybrid bit-pack/run-length parquet encoding.
	RLE rle.Encoding

	// BitPacked is the deprecated bit-packed encoding for repetition and
	// definition levels.
	BitPacked bitpacked.Encoding

	// PlainDictionary is the plain dictionary parquet encoding.
	//
	// This encoding should not be used anymore in parquet 2.0 and later,
	// it is implemented for backwards compatibility to support reading
	// files that were encoded with older parquet libraries.
	PlainDictionary plain.DictionaryEncoding

	// RLEDictionary is the RLE dictionary parquet encoding.
	RLEDictionary rle.DictionaryEncoding

	// DeltaBinaryPacked is the delta binary packed parquet encoding.
	DeltaBinaryPacked delta.BinaryPackedEncoding

	// DeltaLengthByteArray is the delta length byte array parquet encoding.
	DeltaLengthByteArray delta.LengthByteArrayEncoding

	// DeltaByteArray is the delta byte array parquet encoding.
	DeltaByteArray delta.ByteArrayEncoding

	// ByteStreamSplit is an encoding for floating-point data.
	ByteStreamSplit bytestreamsplit.Encoding

	// Table indexing the encodings supported by this package.
	encodings = [...]encoding.Encoding{
		format.Plain:                &Plain,
		format.PlainDictionary:      &PlainDictionary,
		format.BitPacked:            &BitPacked,
		format.RLE:                  &RLE,
		format.RLEDictionary:        &RLEDictionary,
		format.DeltaBinaryPacked:    &DeltaBinaryPacked,
		format.DeltaLengthByteArray: &DeltaLengthByteArray,
		format.DeltaByteArray:       &DeltaByteArray,
		format.ByteStreamSplit:      &ByteStreamSplit,
	}

	// Table indexing RLE encodings for repetition and definition levels of
	// all supported bit widths.
	levelEncodingsRLE = [...]rle.Encoding{
		0: {BitWidth: 1},
		1: {BitWidth: 2},
		2: {BitWidth: 3},
		3: {BitWidth: 4},
		4: {BitWidth: 5},
		5: {BitWidth: 6},
		6: {BitWidth: 7},
		7: {BitWidth: 8},
	}

	levelEncodingsBitPacked = [...]bitpacked.Encoding{
		0: {BitWidth: 1},
		1: {BitWidth: 2},
		2: {BitWidth: 3},
		3: {BitWidth: 4},
		4: {BitWidth: 5},
		5: {BitWidth: 6},
		6: {BitWidth: 7},
		7: {BitWidth: 8},
	}
)

func isDictionaryEncoding(encoding encoding.Encoding) bool {
	return isDictionaryFormat(encoding.Encoding())
}

func isDictionaryFormat(encoding format.Encoding) bool {
	return encoding == format.PlainDictionary || encoding == format.RLEDictionary
}

// LookupEncoding returns the parquet encoding associated with the given code.
//
// The function never returns nil. If the encoding is not supported,
// encoding.NotSupported is returned.
func LookupEncoding(enc format.Encoding) encoding.Encoding {
	if enc >= 0 && int(enc) < len(encodings) {
		if e := encodings[enc]; e != nil {
			return e
		}
	}
	return encoding.NotSupported{}
}

func lookupLevelEncoding(enc format.Encoding, max byte) encoding.Encoding {
	i := bits.Len8(max) - 1
	switch enc {
	case format.RLE:
		return &levelEncodingsRLE[i]
	case format.BitPacked:
		return &levelEncodingsBitPacked[i]
	default:
		return encoding.NotSupported{}
	}
}

func canEncode(e encoding.Encoding, k Kind) bool {
	if isDictionaryEncoding(e) {
		return true
	}
	switch k {
	case Boolean:
		return encoding.CanEncodeBoolean(e)
	case Int32:
		return encoding.CanEncodeInt32(e)
	case Int64:
		return encoding.CanEncodeInt64(e)
	case Int96:
		return encoding.CanEncodeInt96(e)
	case Float:
		return encoding.CanEncodeFloat(e)
	case Double:
		return encoding.CanEncodeDouble(e)
	case ByteArray:
		return encoding.CanEncodeByteArray(e)
	case FixedLenByteArray:
		return encoding.CanEncodeFixedLenByteArray(e)
	default:
		return false
	}
}


================================================
FILE: errors.go
================================================
package parquet

import (
	"errors"
	"fmt"
)

var (
	// ErrCorrupted is an error returned by the Err method of ColumnPages
	// instances when they encountered a mismatch between the CRC checksum
	// recorded in a page header and the one computed while reading the page
	// data.
	ErrCorrupted = errors.New("corrupted parquet page")

	// ErrMissingRootColumn is an error returned when opening an invalid parquet
	// file which does not have a root column.
	ErrMissingRootColumn = errors.New("parquet file is missing a root column")

	// ErrRowGroupSchemaMissing is an error returned when attempting to write a
	// row group but the source has no schema.
	ErrRowGroupSchemaMissing = errors.New("cannot write rows to a row group which has no schema")

	// ErrRowGroupSchemaMismatch is an error returned when attempting to write a
	// row group but the source and destination schemas differ.
	ErrRowGroupSchemaMismatch = errors.New("cannot write row groups with mismatching schemas")

	// ErrRowGroupSortingColumnsMismatch is an error returned when attempting to
	// write a row group but the sorting columns differ in the source and
	// destination.
	ErrRowGroupSortingColumnsMismatch = errors.New("cannot write row groups with mismatching sorting columns")

	// ErrSeekOutOfRange is an error returned when seeking to a row index which
	// is less than the first row of a page.
	ErrSeekOutOfRange = errors.New("seek to row index out of page range")

	// ErrUnexpectedDictionaryPage is an error returned when a page reader
	// encounters a dictionary page after the first page, or in a column
	// which does not use a dictionary encoding.
	ErrUnexpectedDictionaryPage = errors.New("unexpected dictionary page")

	// ErrMissingPageHeader is an error returned when a page reader encounters
	// a malformed page header which is missing page-type-specific information.
	ErrMissingPageHeader = errors.New("missing page header")

	// ErrUnexpectedRepetitionLevels is an error returned when attempting to
	// decode repetition levels into a page which is not part of a repeated
	// column.
	ErrUnexpectedRepetitionLevels = errors.New("unexpected repetition levels")

	// ErrUnexpectedDefinitionLevels is an error returned when attempting to
	// decode definition levels into a page which is part of a required column.
	ErrUnexpectedDefinitionLevels = errors.New("unexpected definition levels")

	// ErrTooManyRowGroups is returned when attempting to generate a parquet
	// file with more than MaxRowGroups row groups.
	ErrTooManyRowGroups = errors.New("the limit of 32767 row groups has been reached")

	// ErrConversion is used to indicate that a conversion betwen two values
	// cannot be done because there are no rules to translate between their
	// physical types.
	ErrInvalidConversion = errors.New("invalid conversion between parquet values")
)

type errno int

const (
	ok errno = iota
	indexOutOfBounds
)

func (e errno) check() {
	switch e {
	case ok:
	case indexOutOfBounds:
		panic("index out of bounds")
	default:
		panic("BUG: unknown error code")
	}
}

func errRowIndexOutOfBounds(rowIndex, rowCount int64) error {
	return fmt.Errorf("row index out of bounds: %d/%d", rowIndex, rowCount)
}


================================================
FILE: example_test.go
================================================
package parquet_test

import (
	"fmt"
	"io"
	"io/ioutil"
	"log"
	"os"

	"github.com/segmentio/parquet-go"
)

func Example() {
	// parquet-go uses the same struct-tag definition style as JSON and XML
	type Contact struct {
		Name string `parquet:"name"`
		// "zstd" specifies the compression for this column
		PhoneNumber string `parquet:"phoneNumber,optional,zstd"`
	}

	type AddressBook struct {
		Owner             string    `parquet:"owner,zstd"`
		OwnerPhoneNumbers []string  `parquet:"ownerPhoneNumbers,gzip"`
		Contacts          []Contact `parquet:"contacts"`
	}

	f, _ := ioutil.TempFile("", "parquet-example-")
	writer := parquet.NewWriter(f)
	rows := []AddressBook{
		{Owner: "UserA", Contacts: []Contact{
			{Name: "Alice", PhoneNumber: "+15505551234"},
			{Name: "Bob"},
		}},
		// Add more rows here.
	}
	for _, row := range rows {
		if err := writer.Write(row); err != nil {
			log.Fatal(err)
		}
	}
	_ = writer.Close()
	_ = f.Close()

	// Now, we can read from the file.
	rf, _ := os.Open(f.Name())
	pf := parquet.NewReader(rf)
	addrs := make([]AddressBook, 0)
	for {
		var addr AddressBook
		err := pf.Read(&addr)
		if err == io.EOF {
			break
		}
		if err != nil {
			log.Fatal(err)
		}
		addrs = append(addrs, addr)
	}
	fmt.Println(addrs[0].Owner)
	// Output: UserA
}


================================================
FILE: file.go
================================================
package parquet

import (
	"bufio"
	"encoding/binary"
	"fmt"
	"hash/crc32"
	"io"
	"sort"
	"strings"
	"sync"

	"github.com/segmentio/encoding/thrift"
	"github.com/segmentio/parquet-go/format"
)

const (
	defaultDictBufferSize = 8192
	defaultReadBufferSize = 4096
)

// File represents a parquet file. The layout of a Parquet file can be found
// here: https://github.com/apache/parquet-format#file-format
type File struct {
	metadata      format.FileMetaData
	protocol      thrift.CompactProtocol
	reader        io.ReaderAt
	size          int64
	schema        *Schema
	root          *Column
	columnIndexes []format.ColumnIndex
	offsetIndexes []format.OffsetIndex
	rowGroups     []RowGroup
	config        *FileConfig
}

// OpenFile opens a parquet file and reads the content between offset 0 and the given
// size in r.
//
// Only the parquet magic bytes and footer are read, column chunks and other
// parts of the file are left untouched; this means that successfully opening
// a file does not validate that the pages have valid checksums.
func OpenFile(r io.ReaderAt, size int64, options ...FileOption) (*File, error) {
	b := make([]byte, 8)
	c, err := NewFileConfig(options...)
	if err != nil {
		return nil, err
	}
	f := &File{reader: r, size: size, config: c}

	if _, err := r.ReadAt(b[:4], 0); err != nil {
		return nil, fmt.Errorf("reading magic header of parquet file: %w", err)
	}
	if string(b[:4]) != "PAR1" {
		return nil, fmt.Errorf("invalid magic header of parquet file: %q", b[:4])
	}

	if cast, ok := f.reader.(interface{ SetMagicFooterSection(offset, length int64) }); ok {
		cast.SetMagicFooterSection(size-8, 8)
	}
	if n, err := r.ReadAt(b[:8], size-8); n != 8 {
		return nil, fmt.Errorf("reading magic footer of parquet file: %w", err)
	}
	if string(b[4:8]) != "PAR1" {
		return nil, fmt.Errorf("invalid magic footer of parquet file: %q", b[4:8])
	}

	footerSize := int64(binary.LittleEndian.Uint32(b[:4]))
	footerData := make([]byte, footerSize)

	if cast, ok := f.reader.(interface{ SetFooterSection(offset, length int64) }); ok {
		cast.SetFooterSection(size-(footerSize+8), footerSize)
	}
	if _, err := f.reader.ReadAt(footerData, size-(footerSize+8)); err != nil {
		return nil, fmt.Errorf("reading footer of parquet file: %w", err)
	}
	if err := thrift.Unmarshal(&f.protocol, footerData, &f.metadata); err != nil {
		return nil, fmt.Errorf("reading parquet file metadata: %w", err)
	}
	if len(f.metadata.Schema) == 0 {
		return nil, ErrMissingRootColumn
	}

	if !c.SkipPageIndex {
		if f.columnIndexes, f.offsetIndexes, err = f.ReadPageIndex(); err != nil {
			return nil, fmt.Errorf("reading page index of parquet file: %w", err)
		}
	}

	if f.root, err = openColumns(f); err != nil {
		return nil, fmt.Errorf("opening columns of parquet file: %w", err)
	}

	var schema *Schema
	if c.Schema != nil {
		schema = c.Schema
	} else {
		schema = NewSchema(f.root.Name(), f.root)
	}
	columns := make([]*Column, 0, numLeafColumnsOf(f.root))
	f.schema = schema
	f.root.forEachLeaf(func(c *Column) { columns = append(columns, c) })

	rowGroups := make([]fileRowGroup, len(f.metadata.RowGroups))
	for i := range rowGroups {
		rowGroups[i].init(f, schema, columns, &f.metadata.RowGroups[i])
	}
	f.rowGroups = make([]RowGroup, len(rowGroups))
	for i := range rowGroups {
		f.rowGroups[i] = &rowGroups[i]
	}

	if !c.SkipBloomFilters {
		section := io.NewSectionReader(r, 0, size)
		rbuf, rbufpool := getBufioReader(section, c.ReadBufferSize)
		defer putBufioReader(rbuf, rbufpool)

		header := format.BloomFilterHeader{}
		compact := thrift.CompactProtocol{}
		decoder := thrift.NewDecoder(compact.NewReader(rbuf))

		for i := range rowGroups {
			g := &rowGroups[i]

			for j := range g.columns {
				c := g.columns[j].(*fileColumnChunk)

				if offset := c.chunk.MetaData.BloomFilterOffset; offset > 0 {
					section.Seek(offset, io.SeekStart)
					rbuf.Reset(section)

					header = format.BloomFilterHeader{}
					if err := decoder.Decode(&header); err != nil {
						return nil, fmt.Errorf("decoding bloom filter header: %w", err)
					}

					offset, _ = section.Seek(0, io.SeekCurrent)
					offset -= int64(rbuf.Buffered())

					if cast, ok := r.(interface{ SetBloomFilterSection(offset, length int64) }); ok {
						bloomFilterOffset := c.chunk.MetaData.BloomFilterOffset
						bloomFilterLength := (offset - bloomFilterOffset) + int64(header.NumBytes)
						cast.SetBloomFilterSection(bloomFilterOffset, bloomFilterLength)
					}

					c.bloomFilter = newBloomFilter(r, offset, &header)
				}
			}
		}
	}

	sortKeyValueMetadata(f.metadata.KeyValueMetadata)
	return f, nil
}

// ReadPageIndex reads the page index section of the parquet file f.
//
// If the file did not contain a page index, the method returns two empty slices
// and a nil error.
//
// Only leaf columns have indexes, the returned indexes are arranged using the
// following layout:
//
//	------------------
//	| col 0: chunk 0 |
//	------------------
//	| col 1: chunk 0 |
//	------------------
//	| ...            |
//	------------------
//	| col 0: chunk 1 |
//	------------------
//	| col 1: chunk 1 |
//	------------------
//	| ...            |
//	------------------
//
// This method is useful in combination with the SkipPageIndex option to delay
// reading the page index section until after the file was opened. Note that in
// this case the page index is not cached within the file, programs are expected
// to make use of independently from the parquet package.
func (f *File) ReadPageIndex() ([]format.ColumnIndex, []format.OffsetIndex, error) {
	if len(f.metadata.RowGroups) == 0 {
		return nil, nil, nil
	}

	columnIndexOffset := f.metadata.RowGroups[0].Columns[0].ColumnIndexOffset
	offsetIndexOffset := f.metadata.RowGroups[0].Columns[0].OffsetIndexOffset
	columnIndexLength := int64(0)
	offsetIndexLength := int64(0)

	forEachColumnChunk := func(do func(int, int, *format.ColumnChunk) error) error {
		for i := range f.metadata.RowGroups {
			for j := range f.metadata.RowGroups[i].Columns {
				c := &f.metadata.RowGroups[i].Columns[j]
				if err := do(i, j, c); err != nil {
					return err
				}
			}
		}
		return nil
	}

	forEachColumnChunk(func(_, _ int, c *format.ColumnChunk) error {
		columnIndexLength += int64(c.ColumnIndexLength)
		offsetIndexLength += int64(c.OffsetIndexLength)
		return nil
	})

	if columnIndexLength == 0 && offsetIndexLength == 0 {
		return nil, nil, nil
	}

	numRowGroups := len(f.metadata.RowGroups)
	numColumns := len(f.metadata.RowGroups[0].Columns)
	numColumnChunks := numRowGroups * numColumns

	columnIndexes := make([]format.ColumnIndex, numColumnChunks)
	offsetIndexes := make([]format.OffsetIndex, numColumnChunks)
	indexBuffer := make([]byte, max(int(columnIndexLength), int(offsetIndexLength)))

	if columnIndexOffset > 0 {
		columnIndexData := indexBuffer[:columnIndexLength]

		if cast, ok := f.reader.(interface{ SetColumnIndexSection(offset, length int64) }); ok {
			cast.SetColumnIndexSection(columnIndexOffset, columnIndexLength)
		}
		if _, err := f.reader.ReadAt(columnIndexData, columnIndexOffset); err != nil {
			return nil, nil, fmt.Errorf("reading %d bytes column index at offset %d: %w", columnIndexLength, columnIndexOffset, err)
		}

		err := forEachColumnChunk(func(i, j int, c *format.ColumnChunk) error {
			// Some parquet files are missing the column index on some columns.
			//
			// An example of this file is testdata/alltypes_tiny_pages_plain.parquet
			// which was added in https://github.com/apache/parquet-testing/pull/24.
			if c.ColumnIndexOffset > 0 {
				offset := c.ColumnIndexOffset - columnIndexOffset
				length := int64(c.ColumnIndexLength)
				buffer := columnIndexData[offset : offset+length]
				if err := thrift.Unmarshal(&f.protocol, buffer, &columnIndexes[(i*numColumns)+j]); err != nil {
					return fmt.Errorf("decoding column index: rowGroup=%d columnChunk=%d/%d: %w", i, j, numColumns, err)
				}
			}
			return nil
		})
		if err != nil {
			return nil, nil, err
		}
	}

	if offsetIndexOffset > 0 {
		offsetIndexData := indexBuffer[:offsetIndexLength]

		if cast, ok := f.reader.(interface{ SetOffsetIndexSection(offset, length int64) }); ok {
			cast.SetOffsetIndexSection(offsetIndexOffset, offsetIndexLength)
		}
		if _, err := f.reader.ReadAt(offsetIndexData, offsetIndexOffset); err != nil {
			return nil, nil, fmt.Errorf("reading %d bytes offset index at offset %d: %w", offsetIndexLength, offsetIndexOffset, err)
		}

		err := forEachColumnChunk(func(i, j int, c *format.ColumnChunk) error {
			if c.OffsetIndexOffset > 0 {
				offset := c.OffsetIndexOffset - offsetIndexOffset
				length := int64(c.OffsetIndexLength)
				buffer := offsetIndexData[offset : offset+length]
				if err := thrift.Unmarshal(&f.protocol, buffer, &offsetIndexes[(i*numColumns)+j]); err != nil {
					return fmt.Errorf("decoding column index: rowGroup=%d columnChunk=%d/%d: %w", i, j, numColumns, err)
				}
			}
			return nil
		})
		if err != nil {
			return nil, nil, err
		}
	}

	return columnIndexes, offsetIndexes, nil
}

// NumRows returns the number of rows in the file.
func (f *File) NumRows() int64 { return f.metadata.NumRows }

// RowGroups returns the list of row groups in the file.
func (f *File) RowGroups() []RowGroup { return f.rowGroups }

// Root returns the root column of f.
func (f *File) Root() *Column { return f.root }

// Schema returns the schema of f.
func (f *File) Schema() *Schema { return f.schema }

// Metadata returns the metadata of f.
func (f *File) Metadata() *format.FileMetaData { return &f.metadata }

// Size returns the size of f (in bytes).
func (f *File) Size() int64 { return f.size }

// ReadAt reads bytes into b from f at the given offset.
//
// The method satisfies the io.ReaderAt interface.
func (f *File) ReadAt(b []byte, off int64) (int, error) {
	if off < 0 || off >= f.size {
		return 0, io.EOF
	}

	if limit := f.size - off; limit < int64(len(b)) {
		n, err := f.reader.ReadAt(b[:limit], off)
		if err == nil {
			err = io.EOF
		}
		return n, err
	}

	return f.reader.ReadAt(b, off)
}

// ColumnIndexes returns the page index of the parquet file f.
//
// If the file did not contain a column index, the method returns an empty slice
// and nil error.
func (f *File) ColumnIndexes() []format.ColumnIndex { return f.columnIndexes }

// OffsetIndexes returns the page index of the parquet file f.
//
// If the file did not contain an offset index, the method returns an empty
// slice and nil error.
func (f *File) OffsetIndexes() []format.OffsetIndex { return f.offsetIndexes }

// Lookup returns the value associated with the given key in the file key/value
// metadata.
//
// The ok boolean will be true if the key was found, false otherwise.
func (f *File) Lookup(key string) (value string, ok bool) {
	return lookupKeyValueMetadata(f.metadata.KeyValueMetadata, key)
}

func (f *File) hasIndexes() bool {
	return f.columnIndexes != nil && f.offsetIndexes != nil
}

var _ io.ReaderAt = (*File)(nil)

func sortKeyValueMetadata(keyValueMetadata []format.KeyValue) {
	sort.Slice(keyValueMetadata, func(i, j int) bool {
		switch {
		case keyValueMetadata[i].Key < keyValueMetadata[j].Key:
			return true
		case keyValueMetadata[i].Key > keyValueMetadata[j].Key:
			return false
		default:
			return keyValueMetadata[i].Value < keyValueMetadata[j].Value
		}
	})
}

func lookupKeyValueMetadata(keyValueMetadata []format.KeyValue, key string) (value string, ok bool) {
	i := sort.Search(len(keyValueMetadata), func(i int) bool {
		return keyValueMetadata[i].Key >= key
	})
	if i == len(keyValueMetadata) || keyValueMetadata[i].Key != key {
		return "", false
	}
	return keyValueMetadata[i].Value, true
}

type fileRowGroup struct {
	schema   *Schema
	rowGroup *format.RowGroup
	columns  []ColumnChunk
	sorting  []SortingColumn
	config   *FileConfig
}

func (g *fileRowGroup) init(file *File, schema *Schema, columns []*Column, rowGroup *format.RowGroup) {
	g.schema = schema
	g.rowGroup = rowGroup
	g.config = file.config
	g.columns = make([]ColumnChunk, len(rowGroup.Columns))
	g.sorting = make([]SortingColumn, len(rowGroup.SortingColumns))
	fileColumnChunks := make([]fileColumnChunk, len(rowGroup.Columns))

	for i := range g.columns {
		fileColumnChunks[i] = fileColumnChunk{
			file:     file,
			column:   columns[i],
			rowGroup: rowGroup,
			chunk:    &rowGroup.Columns[i],
		}

		if file.hasIndexes() {
			j := (int(rowGroup.Ordinal) * len(columns)) + i
			fileColumnChunks[i].columnIndex = &file.columnIndexes[j]
			fileColumnChunks[i].offsetIndex = &file.offsetIndexes[j]
		}

		g.columns[i] = &fileColumnChunks[i]
	}

	for i := range g.sorting {
		g.sorting[i] = &fileSortingColumn{
			column:     columns[rowGroup.SortingColumns[i].ColumnIdx],
			descending: rowGroup.SortingColumns[i].Descending,
			nullsFirst: rowGroup.SortingColumns[i].NullsFirst,
		}
	}
}

func (g *fileRowGroup) Schema() *Schema                 { return g.schema }
func (g *fileRowGroup) NumRows() int64                  { return g.rowGroup.NumRows }
func (g *fileRowGroup) ColumnChunks() []ColumnChunk     { return g.columns }
func (g *fileRowGroup) SortingColumns() []SortingColumn { return g.sorting }
func (g *fileRowGroup) Rows() Rows                      { return newRowGroupRows(g, g.config.ReadMode) }

type fileSortingColumn struct {
	column     *Column
	descending bool
	nullsFirst bool
}

func (s *fileSortingColumn) Path() []string   { return s.column.Path() }
func (s *fileSortingColumn) Descending() bool { return s.descending }
func (s *fileSortingColumn) NullsFirst() bool { return s.nullsFirst }
func (s *fileSortingColumn) String() string {
	b := new(strings.Builder)
	if s.nullsFirst {
		b.WriteString("nulls_first+")
	}
	if s.descending {
		b.WriteString("descending(")
	} else {
		b.WriteString("ascending(")
	}
	b.WriteString(columnPath(s.Path()).String())
	b.WriteString(")")
	return b.String()
}

type fileColumnChunk struct {
	file        *File
	column      *Column
	bloomFilter *bloomFilter
	rowGroup    *format.RowGroup
	columnIndex *format.ColumnIndex
	offsetIndex *format.OffsetIndex
	chunk       *format.ColumnChunk
}

func (c *fileColumnChunk) Type() Type {
	return c.column.Type()
}

func (c *fileColumnChunk) Column() int {
	return int(c.column.Index())
}

func (c *fileColumnChunk) Pages() Pages {
	r := new(filePages)
	r.init(c)
	return r
}

func (c *fileColumnChunk) ColumnIndex() ColumnIndex {
	if c.columnIndex == nil {
		return nil
	}
	return fileColumnIndex{c}
}

func (c *fileColumnChunk) OffsetIndex() OffsetIndex {
	if c.offsetIndex == nil {
		return nil
	}
	return (*fileOffsetIndex)(c.offsetIndex)
}

func (c *fileColumnChunk) BloomFilter() BloomFilter {
	if c.bloomFilter == nil {
		return nil
	}
	return c.bloomFilter
}

func (c *fileColumnChunk) NumValues() int64 {
	return c.chunk.MetaData.NumValues
}

type filePages struct {
	chunk    *fileColumnChunk
	rbuf     *bufio.Reader
	rbufpool *sync.Pool
	section  io.SectionReader

	protocol thrift.CompactProtocol
	decoder  thrift.Decoder

	baseOffset int64
	dataOffset int64
	dictOffset int64
	index      int
	skip       int64
	dictionary Dictionary

	bufferSize int
}

func (f *filePages) init(c *fileColumnChunk) {
	f.chunk = c
	f.baseOffset = c.chunk.MetaData.DataPageOffset
	f.dataOffset = f.baseOffset
	f.bufferSize = c.file.config.ReadBufferSize

	if c.chunk.MetaData.DictionaryPageOffset != 0 {
		f.baseOffset = c.chunk.MetaData.DictionaryPageOffset
		f.dictOffset = f.baseOffset
	}

	f.section = *io.NewSectionReader(c.file, f.baseOffset, c.chunk.MetaData.TotalCompressedSize)
	f.rbuf, f.rbufpool = getBufioReader(&f.section, f.bufferSize)
	f.decoder.Reset(f.protocol.NewReader(f.rbuf))
}

func (f *filePages) ReadPage() (Page, error) {
	if f.chunk == nil {
		return nil, io.EOF
	}

	header := getPageHeader()
	defer putPageHeader(header)

	for {
		if err := f.decoder.Decode(header); err != nil {
			return nil, err
		}
		data, err := f.readPage(header, f.rbuf)
		if err != nil {
			return nil, err
		}

		var page Page
		switch header.Type {
		case format.DataPageV2:
			page, err = f.readDataPageV2(header, data)
		case format.DataPage:
			page, err = f.readDataPageV1(header, data)
		case format.DictionaryPage:
			// Sometimes parquet files do not have the dictionary page offset
			// recorded in the column metadata. We account for this by lazily
			// reading dictionary pages when we encounter them.
			err = f.readDictionaryPage(header, data)
		default:
			err = fmt.Errorf("cannot read values of type %s from page", header.Type)
		}

		data.unref()

		if err != nil {
			return nil, fmt.Errorf("decoding page %d of column %q: %w", f.index, f.columnPath(), err)
		}

		if page == nil {
			continue
		}

		f.index++
		if f.skip == 0 {
			return page, nil
		}

		// TODO: what about pages that don't embed the number of rows?
		// (data page v1 with no offset index in the column chunk).
		numRows := page.NumRows()

		if numRows <= f.skip {
			Release(page)
		} else {
			tail := page.Slice(f.skip, numRows)
			Release(page)
			f.skip = 0
			return tail, nil
		}

		f.skip -= numRows
	}
}

func (f *filePages) readDictionary() error {
	chunk := io.NewSectionReader(f.chunk.file, f.baseOffset, f.chunk.chunk.MetaData.TotalCompressedSize)
	rbuf, pool := getBufioReader(chunk, f.bufferSize)
	defer putBufioReader(rbuf, pool)

	decoder := thrift.NewDecoder(f.protocol.NewReader(rbuf))

	header := getPageHeader()
	defer putPageHeader(header)

	if err := decoder.Decode(header); err != nil {
		return err
	}

	page := buffers.get(int(header.CompressedPageSize))
	defer page.unref()

	if _, err := io.ReadFull(rbuf, page.data); err != nil {
		return err
	}

	return f.readDictionaryPage(header, page)
}

func (f *filePages) readDictionaryPage(header *format.PageHeader, page *buffer) error {
	if header.DictionaryPageHeader == nil {
		return ErrMissingPageHeader
	}
	d, err := f.chunk.column.decodeDictionary(DictionaryPageHeader{header.DictionaryPageHeader}, page, header.UncompressedPageSize)
	if err != nil {
		return err
	}
	f.dictionary = d
	return nil
}

func (f *filePages) readDataPageV1(header *format.PageHeader, page *buffer) (Page, error) {
	if header.DataPageHeader == nil {
		return nil, ErrMissingPageHeader
	}
	if isDictionaryFormat(header.DataPageHeader.Encoding) && f.dictionary == nil {
		if err := f.readDictionary(); err != nil {
			return nil, err
		}
	}
	return f.chunk.column.decodeDataPageV1(DataPageHeaderV1{header.DataPageHeader}, page, f.dictionary, header.UncompressedPageSize)
}

func (f *filePages) readDataPageV2(header *format.PageHeader, page *buffer) (Page, error) {
	if header.DataPageHeaderV2 == nil {
		return nil, ErrMissingPageHeader
	}
	if isDictionaryFormat(header.DataPageHeaderV2.Encoding) && f.dictionary == nil {
		// If the program seeked to a row passed the first page, the dictionary
		// page may not have been seen, in which case we have to lazily load it
		// from the beginning of column chunk.
		if err := f.readDictionary(); err != nil {
			return nil, err
		}
	}
	return f.chunk.column.decodeDataPageV2(DataPageHeaderV2{header.DataPageHeaderV2}, page, f.dictionary, header.UncompressedPageSize)
}

func (f *filePages) readPage(header *format.PageHeader, reader *bufio.Reader) (*buffer, error) {
	page := buffers.get(int(header.CompressedPageSize))
	defer page.unref()

	if _, err := io.ReadFull(reader, page.data); err != nil {
		return nil, err
	}

	if header.CRC != 0 {
		headerChecksum := uint32(header.CRC)
		bufferChecksum := crc32.ChecksumIEEE(page.data)

		if headerChecksum != bufferChecksum {
			// The parquet specs indicate that corruption errors could be
			// handled gracefully by skipping pages, tho this may not always
			// be practical. Depending on how the pages are consumed,
			// missing rows may cause unpredictable behaviors in algorithms.
			//
			// For now, we assume these errors to be fatal, but we may
			// revisit later and improve error handling to be more resilient
			// to data corruption.
			return nil, fmt.Errorf("crc32 checksum mismatch in page of column %q: want=0x%08X got=0x%08X: %w",
				f.columnPath(),
				headerChecksum,
				bufferChecksum,
				ErrCorrupted,
			)
		}
	}

	page.ref()
	return page, nil
}

func (f *filePages) SeekToRow(rowIndex int64) (err error) {
	if f.chunk == nil {
		return io.ErrClosedPipe
	}
	if f.chunk.offsetIndex == nil {
		_, err = f.section.Seek(f.dataOffset-f.baseOffset, io.SeekStart)
		f.skip = rowIndex
		f.index = 0
		if f.dictOffset > 0 {
			f.index = 1
		}
	} else {
		pages := f.chunk.offsetIndex.PageLocations
		index := sort.Search(len(pages), func(i int) bool {
			return pages[i].FirstRowIndex > rowIndex
		}) - 1
		if index < 0 {
			return ErrSeekOutOfRange
		}
		_, err = f.section.Seek(pages[index].Offset-f.baseOffset, io.SeekStart)
		f.skip = rowIndex - pages[index].FirstRowIndex
		f.index = index
	}
	f.rbuf.Reset(&f.section)
	return err
}

func (f *filePages) Close() error {
	putBufioReader(f.rbuf, f.rbufpool)
	f.chunk = nil
	f.section = io.SectionReader{}
	f.rbuf = nil
	f.rbufpool = nil
	f.baseOffset = 0
	f.dataOffset = 0
	f.dictOffset = 0
	f.index = 0
	f.skip = 0
	f.dictionary = nil
	return nil
}

func (f *filePages) columnPath() columnPath {
	return columnPath(f.chunk.column.Path())
}

type putBufioReaderFunc func()

var (
	bufioReaderPoolLock sync.Mutex
	bufioReaderPool     = map[int]*sync.Pool{}
)

func getBufioReader(r io.Reader, bufferSize int) (*bufio.Reader, *sync.Pool) {
	pool := getBufioReaderPool(bufferSize)
	rbuf, _ := pool.Get().(*bufio.Reader)
	if rbuf == nil {
		rbuf = bufio.NewReaderSize(r, bufferSize)
	} else {
		rbuf.Reset(r)
	}
	return rbuf, pool
}

func putBufioReader(rbuf *bufio.Reader, pool *sync.Pool) {
	if rbuf != nil && pool != nil {
		rbuf.Reset(nil)
		pool.Put(rbuf)
	}
}

func getBufioReaderPool(size int) *sync.Pool {
	bufioReaderPoolLock.Lock()
	defer bufioReaderPoolLock.Unlock()

	if pool := bufioReaderPool[size]; pool != nil {
		return pool
	}

	pool := &sync.Pool{}
	bufioReaderPool[size] = pool
	return pool
}

var pageHeaderPool = &sync.Pool{}

func getPageHeader() *format.PageHeader {
	h, _ := pageHeaderPool.Get().(*format.PageHeader)
	if h != nil {
		return h
	}
	return new(format.PageHeader)
}

func putPageHeader(h *format.PageHeader) {
	if h != nil {
		h.CRC = 0
		pageHeaderPool.Put(h)
	}
}


================================================
FILE: file_test.go
================================================
package parquet_test

import (
	"io"
	"os"
	"path/filepath"
	"strings"
	"testing"

	"github.com/segmentio/parquet-go"
)

var testdataFiles []string

func init() {
	entries, _ := os.ReadDir("testdata")
	for _, e := range entries {
		testdataFiles = append(testdataFiles, filepath.Join("testdata", e.Name()))
	}
}

func TestOpenFile(t *testing.T) {
	for _, path := range testdataFiles {
		t.Run(path, func(t *testing.T) {
			f, err := os.Open(path)
			if err != nil {
				t.Fatal(err)
			}
			defer f.Close()

			s, err := f.Stat()
			if err != nil {
				t.Fatal(err)
			}

			p, err := parquet.OpenFile(f, s.Size())
			if err != nil {
				t.Fatal(err)
			}

			if size := p.Size(); size != s.Size() {
				t.Errorf("file size mismatch: want=%d got=%d", s.Size(), size)
			}

			root := p.Root()
			b := new(strings.Builder)
			parquet.PrintSchema(b, root.Name(), root)
			t.Log(b)

			printColumns(t, p.Root(), "")
		})
	}
}

func printColumns(t *testing.T, col *parquet.Column, indent string) {
	if t.Failed() {
		return
	}

	path := strings.Join(col.Path(), ".")
	if col.Leaf() {
		t.Logf("%s%s %v %v", indent, path, col.Encoding(), col.Compression())
	} else {
		t.Logf("%s%s", indent, path)
	}
	indent += ". "

	buffer := make([]parquet.Value, 42)
	pages := col.Pages()
	defer pages.Close()
	for {
		p, err := pages.ReadPage()
		if err != nil {
			if err != io.EOF {
				t.Error(err)
			}
			break
		}

		values := p.Values()
		numValues := int64(0)
		nullCount := int64(0)

		for {
			n, err := values.ReadValues(buffer)
			for _, v := range buffer[:n] {
				if v.Column() != col.Index() {
					t.Errorf("value read from page of column %d says it belongs to column %d", col.Index(), v.Column())
					return
				}
				if v.IsNull() {
					nullCount++
				}
			}
			numValues += int64(n)
			if err != nil {
				if err != io.EOF {
					t.Error(err)
					return
				}
				break
			}
		}

		if numValues != p.NumValues() {
			t.Errorf("page of column %d declared %d values but %d were read", col.Index(), p.NumValues(), numValues)
			return
		}

		if nullCount != p.NumNulls() {
			t.Errorf("page of column %d declared %d nulls but %d were read", col.Index(), p.NumNulls(), nullCount)
			return
		}

		parquet.Release(p)
	}

	for _, child := range col.Columns() {
		printColumns(t, child, indent)
	}
}

func TestFileKeyValueMetadata(t *testing.T) {
	type Row struct {
		Name string
	}

	f, err := createParquetFile(
		makeRows([]Row{{Name: "A"}, {Name: "B"}, {Name: "C"}}),
		parquet.KeyValueMetadata("hello", "ignore this one"),
		parquet.KeyValueMetadata("hello", "world"),
		parquet.KeyValueMetadata("answer", "42"),
	)
	if err != nil {
		t.Fatal(err)
	}

	for _, want := range [][2]string{
		{"hello", "world"},
		{"answer", "42"},
	} {
		key, value := want[0], want[1]
		if found, ok := f.Lookup(key); !ok || found != value {
			t.Errorf("key/value metadata mismatch: want %q=%q but got %q=%q (found=%t)", key, value, key, found, ok)
		}
	}
}


================================================
FILE: filter.go
================================================
package parquet

// FilterRowReader constructs a RowReader which exposes rows from reader for
// which the predicate has returned true.
func FilterRowReader(reader RowReader, predicate func(Row) bool) RowReader {
	f := &filterRowReader{reader: reader, predicate: predicate}
	for i := range f.rows {
		f.rows[i] = f.values[i : i : i+1]
	}
	return f
}

type filterRowReader struct {
	reader    RowReader
	predicate func(Row) bool
	rows      [defaultRowBufferSize]Row
	values    [defaultRowBufferSize]Value
}

func (f *filterRowReader) ReadRows(rows []Row) (n int, err error) {
	for n < len(rows) {
		r := len(rows) - n

		if r > len(f.rows) {
			r = len(f.rows)
		}

		r, err = f.reader.ReadRows(f.rows[:r])

		for i := 0; i < r; i++ {
			if f.predicate(f.rows[i]) {
				rows[n] = append(rows[n][:0], f.rows[i]...)
				n++
			}
		}

		if err != nil {
			break
		}
	}
	return n, err
}

// FilterRowWriter constructs a RowWriter which writes rows to writer for which
// the predicate has returned true.
func FilterRowWriter(writer RowWriter, predicate func(Row) bool) RowWriter {
	return &filterRowWriter{writer: writer, predicate: predicate}
}

type filterRowWriter struct {
	writer    RowWriter
	predicate func(Row) bool
	rows      [defaultRowBufferSize]Row
}

func (f *filterRowWriter) WriteRows(rows []Row) (n int, err error) {
	defer func() {
		clear := f.rows[:]
		for i := range clear {
			clearValues(clear[i])
		}
	}()

	for n < len(rows) {
		i := 0
		j := len(rows) - n

		if j > len(f.rows) {
			j = len(f.rows)
		}

		for _, row := range rows[n : n+j] {
			if f.predicate(row) {
				f.rows[i] = row
				i++
			}
		}

		if i > 0 {
			_, err := f.writer.WriteRows(f.rows[:i])
			if err != nil {
				break
			}
		}

		n += j
	}

	return n, err
}


================================================
FILE: filter_test.go
================================================
package parquet_test

import (
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestFilterRowReader(t *testing.T) {
	rows := []parquet.Row{
		{parquet.Int64Value(0)},
		{parquet.Int64Value(1)},
		{parquet.Int64Value(2)},
		{parquet.Int64Value(3)},
		{parquet.Int64Value(4)},
	}

	want := []parquet.Row{
		{parquet.Int64Value(0)},
		{parquet.Int64Value(2)},
		{parquet.Int64Value(4)},
	}

	reader := parquet.FilterRowReader(&bufferedRows{rows: rows},
		func(row parquet.Row) bool {
			return row[0].Int64()%2 == 0
		},
	)

	writer := &bufferedRows{}
	_, err := parquet.CopyRows(writer, reader)
	if err != nil {
		t.Fatal(err)
	}

	assertEqualRows(t, want, writer.rows)
}

func TestFilterRowWriter(t *testing.T) {
	rows := []parquet.Row{
		{parquet.Int64Value(0)},
		{parquet.Int64Value(1)},
		{parquet.Int64Value(2)},
		{parquet.Int64Value(3)},
		{parquet.Int64Value(4)},
	}

	want := []parquet.Row{
		{parquet.Int64Value(1)},
		{parquet.Int64Value(3)},
	}

	buffer := &bufferedRows{}
	writer := parquet.FilterRowWriter(buffer,
		func(row parquet.Row) bool {
			return row[0].Int64()%2 == 1
		},
	)

	reader := &bufferedRows{rows: rows}
	_, err := parquet.CopyRows(writer, reader)
	if err != nil {
		t.Fatal(err)
	}

	assertEqualRows(t, want, buffer.rows)
}


================================================
FILE: format/parquet.go
================================================
package format

import (
	"fmt"

	"github.com/segmentio/parquet-go/deprecated"
)

// Types supported by Parquet. These types are intended to be used in combination
// with the encodings to control the on disk storage format. For example INT16
// is not included as a type since a good encoding of INT32 would handle this.
type Type int32

const (
	Boolean           Type = 0
	Int32             Type = 1
	Int64             Type = 2
	Int96             Type = 3 // deprecated, only used by legacy implementations.
	Float             Type = 4
	Double            Type = 5
	ByteArray         Type = 6
	FixedLenByteArray Type = 7
)

func (t Type) String() string {
	switch t {
	case Boolean:
		return "BOOLEAN"
	case Int32:
		return "INT32"
	case Int64:
		return "INT64"
	case Int96:
		return "INT96"
	case Float:
		return "FLOAT"
	case Double:
		return "DOUBLE"
	case ByteArray:
		return "BYTE_ARRAY"
	case FixedLenByteArray:
		return "FIXED_LEN_BYTE_ARRAY"
	default:
		return "Type(?)"
	}
}

// Representation of Schemas.
type FieldRepetitionType int32

const (
	// The field is required (can not be null) and each record has exactly 1 value.
	Required FieldRepetitionType = 0
	// The field is optional (can be null) and each record has 0 or 1 values.
	Optional FieldRepetitionType = 1
	// The field is repeated and can contain 0 or more values.
	Repeated FieldRepetitionType = 2
)

func (t FieldRepetitionType) String() string {
	switch t {
	case Required:
		return "REQUIRED"
	case Optional:
		return "OPTIONAL"
	case Repeated:
		return "REPEATED"
	default:
		return "FieldRepeationaType(?)"
	}
}

// Statistics per row group and per page.
// All fields are optional.
type Statistics struct {
	// DEPRECATED: min and max value of the column. Use min_value and max_value.
	//
	// Values are encoded using PLAIN encoding, except that variable-length byte
	// arrays do not include a length prefix.
	//
	// These fields encode min and max values determined by signed comparison
	// only. New files should use the correct order for a column's logical type
	// and store the values in the min_value and max_value fields.
	//
	// To support older readers, these may be set when the column order is
	// signed.
	Max []byte `thrift:"1"`
	Min []byte `thrift:"2"`
	// Count of null value in the column.
	NullCount int64 `thrift:"3"`
	// Count of distinct values occurring.
	DistinctCount int64 `thrift:"4"`
	// Min and max values for the column, determined by its ColumnOrder.
	//
	// Values are encoded using PLAIN encoding, except that variable-length byte
	// arrays do not include a length prefix.
	MaxValue []byte `thrift:"5"`
	MinValue []byte `thrift:"6"`
}

// Empty structs to use as logical type annotations.
type StringType struct{} // allowed for BINARY, must be encoded with UTF-8
type UUIDType struct{}   // allowed for FIXED[16], must encode raw UUID bytes
type MapType struct{}    // see see LogicalTypes.md
type ListType struct{}   // see LogicalTypes.md
type EnumType struct{}   // allowed for BINARY, must be encoded with UTF-8
type DateType struct{}   // allowed for INT32

func (*StringType) String() string { return "STRING" }
func (*UUIDType) String() string   { return "UUID" }
func (*MapType) String() string    { return "MAP" }
func (*ListType) String() string   { return "LIST" }
func (*EnumType) String() string   { return "ENUM" }
func (*DateType) String() string   { return "DATE" }

// Logical type to annotate a column that is always null.
//
// Sometimes when discovering the schema of existing data, values are always
// null and the physical type can't be determined. This annotation signals
// the case where the physical type was guessed from all null values.
type NullType struct{}

func (*NullType) String() string { return "NULL" }

// Decimal logical type annotation
//
// To maintain forward-compatibility in v1, implementations using this logical
// type must also set scale and precision on the annotated SchemaElement.
//
// Allowed for physical types: INT32, INT64, FIXED, and BINARY
type DecimalType struct {
	Scale     int32 `thrift:"1,required"`
	Precision int32 `thrift:"2,required"`
}

func (t *DecimalType) String() string {
	// Matching parquet-cli's decimal string format: https://github.com/apache/parquet-mr/blob/d057b39d93014fe40f5067ee4a33621e65c91552/parquet-column/src/test/java/org/apache/parquet/parser/TestParquetParser.java#L249-L265
	return fmt.Sprintf("DECIMAL(%d,%d)", t.Precision, t.Scale)
}

// Time units for logical types.
type MilliSeconds struct{}
type MicroSeconds struct{}
type NanoSeconds struct{}

func (*MilliSeconds) String() string { return "MILLIS" }
func (*MicroSeconds) String() string { return "MICROS" }
func (*NanoSeconds) String() string  { return "NANOS" }

type TimeUnit struct { // union
	Millis *MilliSeconds `thrift:"1"`
	Micros *MicroSeconds `thrift:"2"`
	Nanos  *NanoSeconds  `thrift:"3"`
}

func (u *TimeUnit) String() string {
	switch {
	case u.Millis != nil:
		return u.Millis.String()
	case u.Micros != nil:
		return u.Micros.String()
	case u.Nanos != nil:
		return u.Nanos.String()
	default:
		return ""
	}
}

// Timestamp logical type annotation
//
// Allowed for physical types: INT64
type TimestampType struct {
	IsAdjustedToUTC bool     `thrift:"1,required"`
	Unit            TimeUnit `thrift:"2,required"`
}

func (t *TimestampType) String() string {
	return fmt.Sprintf("TIMESTAMP(isAdjustedToUTC=%t,unit=%s)", t.IsAdjustedToUTC, &t.Unit)
}

// Time logical type annotation
//
// Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
type TimeType struct {
	IsAdjustedToUTC bool     `thrift:"1,required"`
	Unit            TimeUnit `thrift:"2,required"`
}

func (t *TimeType) String() string {
	return fmt.Sprintf("TIME(isAdjustedToUTC=%t,unit=%s)", t.IsAdjustedToUTC, &t.Unit)
}

// Integer logical type annotation
//
// bitWidth must be 8, 16, 32, or 64.
//
// Allowed for physical types: INT32, INT64
type IntType struct {
	BitWidth int8 `thrift:"1,required"`
	IsSigned bool `thrift:"2,required"`
}

func (t *IntType) String() string {
	return fmt.Sprintf("INT(%d,%t)", t.BitWidth, t.IsSigned)
}

// Embedded JSON logical type annotation
//
// Allowed for physical types: BINARY
type JsonType struct{}

func (t *JsonType) String() string { return "JSON" }

// Embedded BSON logical type annotation
//
// Allowed for physical types: BINARY
type BsonType struct{}

func (t *BsonType) String() string { return "BSON" }

// LogicalType annotations to replace ConvertedType.
//
// To maintain compatibility, implementations using LogicalType for a
// SchemaElement must also set the corresponding ConvertedType (if any)
// from the following table.
type LogicalType struct { // union
	UTF8    *StringType  `thrift:"1"` // use ConvertedType UTF8
	Map     *MapType     `thrift:"2"` // use ConvertedType Map
	List    *ListType    `thrift:"3"` // use ConvertedType List
	Enum    *EnumType    `thrift:"4"` // use ConvertedType Enum
	Decimal *DecimalType `thrift:"5"` // use ConvertedType Decimal + SchemaElement.{Scale, Precision}
	Date    *DateType    `thrift:"6"` // use ConvertedType Date

	// use ConvertedType TimeMicros for Time{IsAdjustedToUTC: *, Unit: Micros}
	// use ConvertedType TimeMillis for Time{IsAdjustedToUTC: *, Unit: Millis}
	Time *TimeType `thrift:"7"`

	// use ConvertedType TimestampMicros for Timestamp{IsAdjustedToUTC: *, Unit: Micros}
	// use ConvertedType TimestampMillis for Timestamp{IsAdjustedToUTC: *, Unit: Millis}
	Timestamp *TimestampType `thrift:"8"`

	// 9: reserved for Interval
	Integer *IntType  `thrift:"10"` // use ConvertedType Int* or Uint*
	Unknown *NullType `thrift:"11"` // no compatible ConvertedType
	Json    *JsonType `thrift:"12"` // use ConvertedType JSON
	Bson    *BsonType `thrift:"13"` // use ConvertedType BSON
	UUID    *UUIDType `thrift:"14"` // no compatible ConvertedType
}

func (t *LogicalType) String() string {
	switch {
	case t.UTF8 != nil:
		return t.UTF8.String()
	case t.Map != nil:
		return t.Map.String()
	case t.List != nil:
		return t.List.String()
	case t.Enum != nil:
		return t.Enum.String()
	case t.Decimal != nil:
		return t.Decimal.String()
	case t.Date != nil:
		return t.Date.String()
	case t.Time != nil:
		return t.Time.String()
	case t.Timestamp != nil:
		return t.Timestamp.String()
	case t.Integer != nil:
		return t.Integer.String()
	case t.Unknown != nil:
		return t.Unknown.String()
	case t.Json != nil:
		return t.Json.String()
	case t.Bson != nil:
		return t.Bson.String()
	case t.UUID != nil:
		return t.UUID.String()
	default:
		return ""
	}
}

// Represents a element inside a schema definition.
//
//   - if it is a group (inner node) then type is undefined and num_children is
//     defined
//
//   - if it is a primitive type (leaf) then type is defined and num_children is
//     undefined
//
// The nodes are listed in depth first traversal order.
type SchemaElement struct {
	// Data type for this field. Not set if the current element is a non-leaf node.
	Type *Type `thrift:"1,optional"`

	// If type is FixedLenByteArray, this is the byte length of the values.
	// Otherwise, if specified, this is the maximum bit length to store any of the values.
	// (e.g. a low cardinality INT col could have this set to 3).  Note that this is
	// in the schema, and therefore fixed for the entire file.
	TypeLength *int32 `thrift:"2,optional"`

	// repetition of the field. The root of the schema does not have a repetition_type.
	// All other nodes must have one.
	RepetitionType *FieldRepetitionType `thrift:"3,optional"`

	// Name of the field in the schema.
	Name string `thrift:"4,required"`

	// Nested fields.  Since thrift does not support nested fields,
	// the nesting is flattened to a single list by a depth-first traversal.
	// The children count is used to construct the nested relationship.
	// This field is not set when the element is a primitive type
	NumChildren int32 `thrift:"5,optional"`

	// DEPRECATED: When the schema is the result of a conversion from another model.
	// Used to record the original type to help with cross conversion.
	//
	// This is superseded by logicalType.
	ConvertedType *deprecated.ConvertedType `thrift:"6,optional"`

	// DEPRECATED: Used when this column contains decimal data.
	// See the DECIMAL converted type for more details.
	//
	// This is superseded by using the DecimalType annotation in logicalType.
	Scale     *int32 `thrift:"7,optional"`
	Precision *int32 `thrift:"8,optional"`

	// When the original schema supports field ids, this will save the
	// original field id in the parquet schema.
	FieldID int32 `thrift:"9,optional"`

	// The logical type of this SchemaElement
	//
	// LogicalType replaces ConvertedType, but ConvertedType is still required
	// for some logical types to ensure forward-compatibility in format v1.
	LogicalType *LogicalType `thrift:"10,optional"`
}

// Encodings supported by Parquet. Not all encodings are valid for all types.
// These enums are also used to specify the encoding of definition and
// repetition levels. See the accompanying doc for the details of the more
// complicated encodings.
type Encoding int32

const (
	// Default encoding.
	// Boolean - 1 bit per value. 0 is false; 1 is true.
	// Int32 - 4 bytes per value. Stored as little-endian.
	// Int64 - 8 bytes per value. Stored as little-endian.
	// Float - 4 bytes per value. IEEE. Stored as little-endian.
	// Double - 8 bytes per value. IEEE. Stored as little-endian.
	// ByteArray - 4 byte length stored as little endian, followed by bytes.
	// FixedLenByteArray - Just the bytes.
	Plain Encoding = 0

	// Group VarInt encoding for Int32/Int64.
	// This encoding is deprecated. It was never used.
	// GroupVarInt Encoding = 1

	// Deprecated: Dictionary encoding. The values in the dictionary are encoded
	// in the plain type.
	// In a data page use RLEDictionary instead.
	// In a Dictionary page use Plain instead.
	PlainDictionary Encoding = 2

	// Group packed run length encoding. Usable for definition/repetition levels
	// encoding and Booleans (on one bit: 0 is false 1 is true.)
	RLE Encoding = 3

	// Bit packed encoding. This can only be used if the data has a known max
	// width. Usable for definition/repetition levels encoding.
	BitPacked Encoding = 4

	// Delta encoding for integers. This can be used for int columns and works best
	// on sorted data.
	DeltaBinaryPacked Encoding = 5

	// Encoding for byte arrays to separate the length values and the data.
	// The lengths are encoded using DeltaBinaryPacked.
	DeltaLengthByteArray Encoding = 6

	// Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED.
	// Suffixes are stored as delta length byte arrays.
	DeltaByteArray Encoding = 7

	// Dictionary encoding: the ids are encoded using the RLE encoding
	RLEDictionary Encoding = 8

	// Encoding for floating-point data.
	// K byte-streams are created where K is the size in bytes of the data type.
	// The individual bytes of an FP value are scattered to the corresponding stream and
	// the streams are concatenated.
	// This itself does not reduce the size of the data but can lead to better compression
	// afterwards.
	ByteStreamSplit Encoding = 9
)

func (e Encoding) String() string {
	switch e {
	case Plain:
		return "PLAIN"
	case PlainDictionary:
		return "PLAIN_DICTIONARY"
	case RLE:
		return "RLE"
	case BitPacked:
		return "BIT_PACKED"
	case DeltaBinaryPacked:
		return "DELTA_BINARY_PACKED"
	case DeltaLengthByteArray:
		return "DELTA_LENGTH_BYTE_ARRAY"
	case DeltaByteArray:
		return "DELTA_BYTE_ARRAY"
	case RLEDictionary:
		return "RLE_DICTIONARY"
	case ByteStreamSplit:
		return "BYTE_STREAM_SPLIT"
	default:
		return "Encoding(?)"
	}
}

// Supported compression algorithms.
//
// Codecs added in format version X.Y can be read by readers based on X.Y and later.
// Codec support may vary between readers based on the format version and
// libraries available at runtime.
//
// See Compression.md for a detailed specification of these algorithms.
type CompressionCodec int32

const (
	Uncompressed CompressionCodec = 0
	Snappy       CompressionCodec = 1
	Gzip         CompressionCodec = 2
	LZO          CompressionCodec = 3
	Brotli       CompressionCodec = 4 // Added in 2.4
	Lz4          CompressionCodec = 5 // DEPRECATED (Added in 2.4)
	Zstd         CompressionCodec = 6 // Added in 2.4
	Lz4Raw       CompressionCodec = 7 // Added in 2.9
)

func (c CompressionCodec) String() string {
	switch c {
	case Uncompressed:
		return "UNCOMPRESSED"
	case Snappy:
		return "SNAPPY"
	case Gzip:
		return "GZIP"
	case LZO:
		return "LZO"
	case Brotli:
		return "BROTLI"
	case Lz4:
		return "LZ4"
	case Zstd:
		return "ZSTD"
	case Lz4Raw:
		return "LZ4_RAW"
	default:
		return "CompressionCodec(?)"
	}
}

type PageType int32

const (
	DataPage       PageType = 0
	IndexPage      PageType = 1
	DictionaryPage PageType = 2
	// Version 2 is indicated in the PageHeader and the use of DataPageHeaderV2,
	// and allows you to read repetition and definition level data without
	// decompressing the Page.
	DataPageV2 PageType = 3
)

func (p PageType) String() string {
	switch p {
	case DataPage:
		return "DATA_PAGE"
	case IndexPage:
		return "INDEX_PAGE"
	case DictionaryPage:
		return "DICTIONARY_PAGE"
	case DataPageV2:
		return "DATA_PAGE_V2"
	default:
		return "PageType(?)"
	}
}

// Enum to annotate whether lists of min/max elements inside ColumnIndex
// are ordered and if so, in which direction.
type BoundaryOrder int32

const (
	Unordered  BoundaryOrder = 0
	Ascending  BoundaryOrder = 1
	Descending BoundaryOrder = 2
)

func (b BoundaryOrder) String() string {
	switch b {
	case Unordered:
		return "UNORDERED"
	case Ascending:
		return "ASCENDING"
	case Descending:
		return "DESCENDING"
	default:
		return "BoundaryOrder(?)"
	}
}

// Data page header.
type DataPageHeader struct {
	// Number of values, including NULLs, in this data page.
	NumValues int32 `thrift:"1,required"`

	// Encoding used for this data page.
	Encoding Encoding `thrift:"2,required"`

	// Encoding used for definition levels.
	DefinitionLevelEncoding Encoding `thrift:"3,required"`

	// Encoding used for repetition levels.
	RepetitionLevelEncoding Encoding `thrift:"4,required"`

	// Optional statistics for the data in this page.
	Statistics Statistics `thrift:"5,optional"`
}

type IndexPageHeader struct {
	// TODO
}

// The dictionary page must be placed at the first position of the column chunk
// if it is partly or completely dictionary encoded. At most one dictionary page
// can be placed in a column chunk.
type DictionaryPageHeader struct {
	// Number of values in the dictionary.
	NumValues int32 `thrift:"1,required"`

	// Encoding using this dictionary page.
	Encoding Encoding `thrift:"2,required"`

	// If true, the entries in the dictionary are sorted in ascending order.
	IsSorted bool `thrift:"3,optional"`
}

// New page format allowing reading levels without decompressing the data
// Repetition and definition levels are uncompressed
// The remaining section containing the data is compressed if is_compressed is
// true.
type DataPageHeaderV2 struct {
	// Number of values, including NULLs, in this data page.
	NumValues int32 `thrift:"1,required"`
	// Number of NULL values, in this data page.
	// Number of non-null = num_values - num_nulls which is also the number of
	// values in the data section.
	NumNulls int32 `thrift:"2,required"`
	// Number of rows in this data page. which means pages change on record boundaries (r = 0).
	NumRows int32 `thrift:"3,required"`
	// Encoding used for data in this page.
	Encoding Encoding `thrift:"4,required"`

	// Repetition levels and definition levels are always using RLE (without size in it).

	// Length of the definition levels.
	DefinitionLevelsByteLength int32 `thrift:"5,required"`
	// Length of the repetition levels.
	RepetitionLevelsByteLength int32 `thrift:"6,required"`

	// Whether the values are compressed.
	// Which means the section of the page between
	// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included)
	// is compressed with the compression_codec.
	// If missing it is considered compressed.
	IsCompressed *bool `thrift:"7,optional"`

	// Optional statistics for the data in this page.
	Statistics Statistics `thrift:"8,optional"`
}

// Block-based algorithm type annotation.
type SplitBlockAlgorithm struct{}

// The algorithm used in Bloom filter.
type BloomFilterAlgorithm struct { // union
	Block *SplitBlockAlgorithm `thrift:"1"`
}

// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic
// hash algorithm. It uses 64 bits version of xxHash.
type XxHash struct{}

// The hash function used in Bloom filter. This function takes the hash of a
// column value using plain encoding.
type BloomFilterHash struct { // union
	XxHash *XxHash `thrift:"1"`
}

// The compression used in the Bloom filter.
type BloomFilterUncompressed struct{}
type BloomFilterCompression struct { // union
	Uncompressed *BloomFilterUncompressed `thrift:"1"`
}

// Bloom filter header is stored at beginning of Bloom filter data of each column
// and followed by its bitset.
type BloomFilterHeader struct {
	// The size of bitset in bytes.
	NumBytes int32 `thrift:"1,required"`
	// The algorithm for setting bits.
	Algorithm BloomFilterAlgorithm `thrift:"2,required"`
	// The hash function used for Bloom filter.
	Hash BloomFilterHash `thrift:"3,required"`
	// The compression used in the Bloom filter.
	Compression BloomFilterCompression `thrift:"4,required"`
}

type PageHeader struct {
	// The type of the page indicates which of the *Header fields below is set.
	Type PageType `thrift:"1,required"`

	// Uncompressed page size in bytes (not including this header).
	UncompressedPageSize int32 `thrift:"2,required"`

	// Compressed (and potentially encrypted) page size in bytes, not including
	// this header.
	CompressedPageSize int32 `thrift:"3,required"`

	// The 32bit CRC for the page, to be be calculated as follows:
	// - Using the standard CRC32 algorithm
	// - On the data only, i.e. this header should not be included. 'Data'
	//   hereby refers to the concatenation of the repetition levels, the
	//   definition levels and the column value, in this exact order.
	// - On the encoded versions of the repetition levels, definition levels and
	//   column values.
	// - On the compressed versions of the repetition levels, definition levels
	//   and column values where possible;
	//   - For v1 data pages, the repetition levels, definition levels and column
	//     values are always compressed together. If a compression scheme is
	//     specified, the CRC shall be calculated on the compressed version of
	//     this concatenation. If no compression scheme is specified, the CRC
	//     shall be calculated on the uncompressed version of this concatenation.
	//   - For v2 data pages, the repetition levels and definition levels are
	//     handled separately from the data and are never compressed (only
	//     encoded). If a compression scheme is specified, the CRC shall be
	//     calculated on the concatenation of the uncompressed repetition levels,
	//     uncompressed definition levels and the compressed column values.
	//     If no compression scheme is specified, the CRC shall be calculated on
	//     the uncompressed concatenation.
	// - In encrypted columns, CRC is calculated after page encryption; the
	//   encryption itself is performed after page compression (if compressed)
	// If enabled, this allows for disabling checksumming in HDFS if only a few
	// pages need to be read.
	CRC int32 `thrift:"4,optional"`

	// Headers for page specific data. One only will be set.
	DataPageHeader       *DataPageHeader       `thrift:"5,optional"`
	IndexPageHeader      *IndexPageHeader      `thrift:"6,optional"`
	DictionaryPageHeader *DictionaryPageHeader `thrift:"7,optional"`
	DataPageHeaderV2     *DataPageHeaderV2     `thrift:"8,optional"`
}

// Wrapper struct to store key values.
type KeyValue struct {
	Key   string `thrift:"1,required"`
	Value string `thrift:"2,required"`
}

// Wrapper struct to specify sort order.
type SortingColumn struct {
	// The column index (in this row group)
	ColumnIdx int32 `thrift:"1,required"`

	// If true, indicates this column is sorted in descending order.
	Descending bool `thrift:"2,required"`

	// If true, nulls will come before non-null values, otherwise,
	// nulls go at the end.
	NullsFirst bool `thrift:"3,required"`
}

// Statistics of a given page type and encoding.
type PageEncodingStats struct {
	// The page type (data/dic/...).
	PageType PageType `thrift:"1,required"`

	// Encoding of the page.
	Encoding Encoding `thrift:"2,required"`

	// Number of pages of this type with this encoding.
	Count int32 `thrift:"3,required"`
}

// Description for column metadata.
type ColumnMetaData struct {
	// Type of this column.
	Type Type `thrift:"1,required"`

	// Set of all encodings used for this column. The purpose is to validate
	// whether we can decode those pages.
	Encoding []Encoding `thrift:"2,required"`

	// Path in schema.
	PathInSchema []string `thrift:"3,required"`

	// Compression codec.
	Codec CompressionCodec `thrift:"4,required"`

	// Number of values in this column.
	NumValues int64 `thrift:"5,required"`

	// Total byte size of all uncompressed pages in this column chunk (including the headers).
	TotalUncompressedSize int64 `thrift:"6,required"`

	// Total byte size of all compressed, and potentially encrypted, pages
	// in this column chunk (including the headers).
	TotalCompressedSize int64 `thrift:"7,required"`

	// Optional key/value metadata.
	KeyValueMetadata []KeyValue `thrift:"8,optional"`

	// Byte offset from beginning of file to first data page.
	DataPageOffset int64 `thrift:"9,required"`

	// Byte offset from beginning of file to root index page.
	IndexPageOffset int64 `thrift:"10,optional"`

	// Byte offset from the beginning of file to first (only) dictionary page.
	DictionaryPageOffset int64 `thrift:"11,optional"`

	// optional statistics for this column chunk.
	Statistics Statistics `thrift:"12,optional"`

	// Set of all encodings used for pages in this column chunk.
	// This information can be used to determine if all data pages are
	// dictionary encoded for example.
	EncodingStats []PageEncodingStats `thrift:"13,optional"`

	// Byte offset from beginning of file to Bloom filter data.
	BloomFilterOffset int64 `thrift:"14,optional"`
}

type EncryptionWithFooterKey struct{}

type EncryptionWithColumnKey struct {
	// Column path in schema.
	PathInSchema []string `thrift:"1,required"`

	// Retrieval metadata of column encryption key.
	KeyMetadata []byte `thrift:"2,optional"`
}

type ColumnCryptoMetaData struct {
	EncryptionWithFooterKey *EncryptionWithFooterKey `thrift:"1"`
	EncryptionWithColumnKey *EncryptionWithColumnKey `thrift:"2"`
}

type ColumnChunk struct {
	// File where column data is stored.  If not set, assumed to be same file as
	// metadata.  This path is relative to the current file.
	FilePath string `thrift:"1,optional"`

	// Byte offset in file_path to the ColumnMetaData.
	FileOffset int64 `thrift:"2,required"`

	// Column metadata for this chunk. This is the same content as what is at
	// file_path/file_offset. Having it here has it replicated in the file
	// metadata.
	MetaData ColumnMetaData `thrift:"3,optional"`

	// File offset of ColumnChunk's OffsetIndex.
	OffsetIndexOffset int64 `thrift:"4,optional"`

	// Size of ColumnChunk's OffsetIndex, in bytes.
	OffsetIndexLength int32 `thrift:"5,optional"`

	// File offset of ColumnChunk's ColumnIndex.
	ColumnIndexOffset int64 `thrift:"6,optional"`

	// Size of ColumnChunk's ColumnIndex, in bytes.
	ColumnIndexLength int32 `thrift:"7,optional"`

	// Crypto metadata of encrypted columns.
	CryptoMetadata ColumnCryptoMetaData `thrift:"8,optional"`

	// Encrypted column metadata for this chunk.
	EncryptedColumnMetadata []byte `thrift:"9,optional"`
}

type RowGroup struct {
	// Metadata for each column chunk in this row group.
	// This list must have the same order as the SchemaElement list in FileMetaData.
	Columns []ColumnChunk `thrift:"1,required"`

	// Total byte size of all the uncompressed column data in this row group.
	TotalByteSize int64 `thrift:"2,required"`

	// Number of rows in this row group.
	NumRows int64 `thrift:"3,required"`

	// If set, specifies a sort ordering of the rows in this RowGroup.
	// The sorting columns can be a subset of all the columns.
	SortingColumns []SortingColumn `thrift:"4,optional"`

	// Byte offset from beginning of file to first page (data or dictionary)
	// in this row group
	FileOffset int64 `thrift:"5,optional"`

	// Total byte size of all compressed (and potentially encrypted) column data
	// in this row group.
	TotalCompressedSize int64 `thrift:"6,optional"`

	// Row group ordinal in the file.
	Ordinal int16 `thrift:"7,optional"`
}

// Empty struct to signal the order defined by the physical or logical type.
type TypeDefinedOrder struct{}

// Union to specify the order used for the min_value and max_value fields for a
// column. This union takes the role of an enhanced enum that allows rich
// elements (which will be needed for a collation-based ordering in the future).
//
// Possible values are:
//
//	TypeDefinedOrder - the column uses the order defined by its logical or
//	                   physical type (if there is no logical type).
//
// If the reader does not support the value of this union, min and max stats
// for this column should be ignored.
type ColumnOrder struct { // union
	// The sort orders for logical types are:
	//   UTF8 - unsigned byte-wise comparison
	//   INT8 - signed comparison
	//   INT16 - signed comparison
	//   INT32 - signed comparison
	//   INT64 - signed comparison
	//   UINT8 - unsigned comparison
	//   UINT16 - unsigned comparison
	//   UINT32 - unsigned comparison
	//   UINT64 - unsigned comparison
	//   DECIMAL - signed comparison of the represented value
	//   DATE - signed comparison
	//   TIME_MILLIS - signed comparison
	//   TIME_MICROS - signed comparison
	//   TIMESTAMP_MILLIS - signed comparison
	//   TIMESTAMP_MICROS - signed comparison
	//   INTERVAL - unsigned comparison
	//   JSON - unsigned byte-wise comparison
	//   BSON - unsigned byte-wise comparison
	//   ENUM - unsigned byte-wise comparison
	//   LIST - undefined
	//   MAP - undefined
	//
	// In the absence of logical types, the sort order is determined by the physical type:
	//   BOOLEAN - false, true
	//   INT32 - signed comparison
	//   INT64 - signed comparison
	//   INT96 (only used for legacy timestamps) - undefined
	//   FLOAT - signed comparison of the represented value (*)
	//   DOUBLE - signed comparison of the represented value (*)
	//   BYTE_ARRAY - unsigned byte-wise comparison
	//   FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison
	//
	// (*) Because the sorting order is not specified properly for floating
	//     point values (relations vs. total ordering) the following
	//     compatibility rules should be applied when reading statistics:
	//     - If the min is a NaN, it should be ignored.
	//     - If the max is a NaN, it should be ignored.
	//     - If the min is +0, the row group may contain -0 values as well.
	//     - If the max is -0, the row group may contain +0 values as well.
	//     - When looking for NaN values, min and max should be ignored.
	TypeOrder *TypeDefinedOrder `thrift:"1"`
}

type PageLocation struct {
	// Offset of the page in the file.
	Offset int64 `thrift:"1,required"`

	// Size of the page, including header. Sum of compressed_page_size and
	// header length.
	CompressedPageSize int32 `thrift:"2,required"`

	// Index within the RowGroup of the first row of the page; this means
	// pages change on record boundaries (r = 0).
	FirstRowIndex int64 `thrift:"3,required"`
}

type OffsetIndex struct {
	// PageLocations, ordered by increasing PageLocation.offset. It is required
	// that page_locations[i].first_row_index < page_locations[i+1].first_row_index.
	PageLocations []PageLocation `thrift:"1,required"`
}

// Description for ColumnIndex.
// Each <array-field>[i] refers to the page at OffsetIndex.PageLocations[i]
type ColumnIndex struct {
	// A list of Boolean values to determine the validity of the corresponding
	// min and max values. If true, a page contains only null values, and writers
	// have to set the corresponding entries in min_values and max_values to
	// byte[0], so that all lists have the same length. If false, the
	// corresponding entries in min_values and max_values must be valid.
	NullPages []bool `thrift:"1,required"`

	// Two lists containing lower and upper bounds for the values of each page
	// determined by the ColumnOrder of the column. These may be the actual
	// minimum and maximum values found on a page, but can also be (more compact)
	// values that do not exist on a page. For example, instead of storing ""Blart
	// Versenwald III", a writer may set min_values[i]="B", max_values[i]="C".
	// Such more compact values must still be valid values within the column's
	// logical type. Readers must make sure that list entries are populated before
	// using them by inspecting null_pages.
	MinValues [][]byte `thrift:"2,required"`
	MaxValues [][]byte `thrift:"3,required"`

	// Stores whether both min_values and max_values are ordered and if so, in
	// which direction. This allows readers to perform binary searches in both
	// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even
	// if the lists are ordered.
	BoundaryOrder BoundaryOrder `thrift:"4,required"`

	// A list containing the number of null values for each page.
	NullCounts []int64 `thrift:"5,optional"`
}

type AesGcmV1 struct {
	// AAD prefix.
	AadPrefix []byte `thrift:"1,optional"`

	// Unique file identifier part of AAD suffix.
	AadFileUnique []byte `thrift:"2,optional"`

	// In files encrypted with AAD prefix without storing it,
	// readers must supply the prefix.
	SupplyAadPrefix bool `thrift:"3,optional"`
}

type AesGcmCtrV1 struct {
	// AAD prefix.
	AadPrefix []byte `thrift:"1,optional"`

	// Unique file identifier part of AAD suffix.
	AadFileUnique []byte `thrift:"2,optional"`

	// In files encrypted with AAD prefix without storing it,
	// readers must supply the prefix.
	SupplyAadPrefix bool `thrift:"3,optional"`
}

type EncryptionAlgorithm struct { // union
	AesGcmV1    *AesGcmV1    `thrift:"1"`
	AesGcmCtrV1 *AesGcmCtrV1 `thrift:"2"`
}

// Description for file metadata.
type FileMetaData struct {
	// Version of this file.
	Version int32 `thrift:"1,required"`

	// Parquet schema for this file.  This schema contains metadata for all the columns.
	// The schema is represented as a tree with a single root.  The nodes of the tree
	// are flattened to a list by doing a depth-first traversal.
	// The column metadata contains the path in the schema for that column which can be
	// used to map columns to nodes in the schema.
	// The first element is the root.
	Schema []SchemaElement `thrift:"2,required"`

	// Number of rows in this file.
	NumRows int64 `thrift:"3,required"`

	// Row groups in this file.
	RowGroups []RowGroup `thrift:"4,required"`

	// Optional key/value metadata.
	KeyValueMetadata []KeyValue `thrift:"5,optional"`

	// String for application that wrote this file.  This should be in the format
	// <Application> version <App Version> (build <App Build Hash>).
	// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
	CreatedBy string `thrift:"6,optional"`

	// Sort order used for the min_value and max_value fields in the Statistics
	// objects and the min_values and max_values fields in the ColumnIndex
	// objects of each column in this file. Sort orders are listed in the order
	// matching the columns in the schema. The indexes are not necessary the same
	// though, because only leaf nodes of the schema are represented in the list
	// of sort orders.
	//
	// Without column_orders, the meaning of the min_value and max_value fields
	// in the Statistics object and the ColumnIndex object is undefined. To ensure
	// well-defined behavior, if these fields are written to a Parquet file,
	// column_orders must be written as well.
	//
	// The obsolete min and max fields in the Statistics object are always sorted
	// by signed comparison regardless of column_orders.
	ColumnOrders []ColumnOrder `thrift:"7,optional"`

	// Encryption algorithm. This field is set only in encrypted files
	// with plaintext footer. Files with encrypted footer store algorithm id
	// in FileCryptoMetaData structure.
	EncryptionAlgorithm EncryptionAlgorithm `thrift:"8,optional"`

	// Retrieval metadata of key used for signing the footer.
	// Used only in encrypted files with plaintext footer.
	FooterSigningKeyMetadata []byte `thrift:"9,optional"`
}

// Crypto metadata for files with encrypted footer.
type FileCryptoMetaData struct {
	// Encryption algorithm. This field is only used for files
	// with encrypted footer. Files with plaintext footer store algorithm id
	// inside footer (FileMetaData structure).
	EncryptionAlgorithm EncryptionAlgorithm `thrift:"1,required"`

	// Retrieval metadata of key used for encryption of footer,
	// and (possibly) columns.
	KeyMetadata []byte `thrift:"2,optional"`
}


================================================
FILE: format/parquet_test.go
================================================
package format_test

import (
	"reflect"
	"testing"

	"github.com/segmentio/encoding/thrift"
	"github.com/segmentio/parquet-go/format"
)

func TestMarshalUnmarshalSchemaMetadata(t *testing.T) {
	protocol := &thrift.CompactProtocol{}
	metadata := &format.FileMetaData{
		Version: 1,
		Schema: []format.SchemaElement{
			{
				Name: "hello",
			},
		},
		RowGroups: []format.RowGroup{},
	}

	b, err := thrift.Marshal(protocol, metadata)
	if err != nil {
		t.Fatal(err)
	}

	decoded := &format.FileMetaData{}
	if err := thrift.Unmarshal(protocol, b, &decoded); err != nil {
		t.Fatal(err)
	}

	if !reflect.DeepEqual(metadata, decoded) {
		t.Error("values mismatch:")
		t.Logf("expected:\n%#v", metadata)
		t.Logf("found:\n%#v", decoded)
	}
}


================================================
FILE: go.mod
================================================
module github.com/segmentio/parquet-go

go 1.19

require (
	github.com/andybalholm/brotli v1.0.3
	github.com/google/uuid v1.3.0
	github.com/hexops/gotextdiff v1.0.3
	github.com/klauspost/compress v1.15.9
	github.com/olekukonko/tablewriter v0.0.5
	github.com/pierrec/lz4/v4 v4.1.9
	github.com/segmentio/encoding v0.3.5
	golang.org/x/sys v0.0.0-20211110154304-99a53858aa08
	google.golang.org/protobuf v1.30.0
)

require github.com/mattn/go-runewidth v0.0.9 // indirect


================================================
FILE: go.sum
================================================
github.com/andybalholm/brotli v1.0.3 h1:fpcw+r1N1h0Poc1F/pHbW40cUm/lMEQslZtCkBQ0UnM=
github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pierrec/lz4/v4 v4.1.9 h1:xkrjwpOP5xg1k4Nn4GX4a4YFGhscyQL/3EddJ1Xxqm8=
github.com/pierrec/lz4/v4 v4.1.9/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg=
github.com/segmentio/encoding v0.3.5 h1:UZEiaZ55nlXGDL92scoVuw00RmiRCazIEmvPSbSvt8Y=
github.com/segmentio/encoding v0.3.5/go.mod h1:n0JeuIqEQrQoPDGsjo8UNd1iA0U8d8+oHAA4E3G3OxM=
golang.org/x/sys v0.0.0-20211110154304-99a53858aa08 h1:WecRHqgE09JBkh/584XIE6PMz5KKE/vER4izNUi30AQ=
golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=


================================================
FILE: hashprobe/aeshash/aeshash.go
================================================
// Package aeshash implements hashing functions derived from the Go runtime's
// internal hashing based on the support of AES encryption in CPU instructions.
//
// On architecture where the CPU does not provide instructions for AES
// encryption, the aeshash.Enabled function always returns false, and attempting
// to call any other function will trigger a panic.
package aeshash

import "github.com/segmentio/parquet-go/sparse"

func MultiHash32(hashes []uintptr, values []uint32, seed uintptr) {
	MultiHashUint32Array(hashes, sparse.MakeUint32Array(values), seed)
}

func MultiHash64(hashes []uintptr, values []uint64, seed uintptr) {
	MultiHashUint64Array(hashes, sparse.MakeUint64Array(values), seed)
}

func MultiHash128(hashes []uintptr, values [][16]byte, seed uintptr) {
	MultiHashUint128Array(hashes, sparse.MakeUint128Array(values), seed)
}


================================================
FILE: hashprobe/aeshash/aeshash_amd64.go
================================================
//go:build !purego

package aeshash

import (
	"github.com/segmentio/parquet-go/sparse"
	"golang.org/x/sys/cpu"
)

// Enabled returns true if AES hash is available on the system.
//
// The function uses the same logic than the Go runtime since we depend on
// the AES hash state being initialized.
//
// See https://go.dev/src/runtime/alg.go
func Enabled() bool { return cpu.X86.HasAES && cpu.X86.HasSSSE3 && cpu.X86.HasSSE41 }

//go:noescape
func Hash32(value uint32, seed uintptr) uintptr

//go:noescape
func Hash64(value uint64, seed uintptr) uintptr

//go:noescape
func Hash128(value [16]byte, seed uintptr) uintptr

//go:noescape
func MultiHashUint32Array(hashes []uintptr, values sparse.Uint32Array, seed uintptr)

//go:noescape
func MultiHashUint64Array(hashes []uintptr, values sparse.Uint64Array, seed uintptr)

//go:noescape
func MultiHashUint128Array(hashes []uintptr, values sparse.Uint128Array, seed uintptr)


================================================
FILE: hashprobe/aeshash/aeshash_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func Hash32(value uint32, seed uintptr) uintptr
TEXT ·Hash32(SB), NOSPLIT, $0-24
    MOVL value+0(FP), AX
    MOVQ seed+8(FP), BX

    MOVOU runtime·aeskeysched+0(SB), X1
    MOVOU runtime·aeskeysched+16(SB), X2
    MOVOU runtime·aeskeysched+32(SB), X3

    MOVQ BX, X0
    PINSRD $2, AX, X0

    AESENC X1, X0
    AESENC X2, X0
    AESENC X3, X0

    MOVQ X0, ret+16(FP)
    RET

// func Hash64(value uint64, seed uintptr) uintptr
TEXT ·Hash64(SB), NOSPLIT, $0-24
    MOVQ value+0(FP), AX
    MOVQ seed+8(FP), BX

    MOVOU runtime·aeskeysched+0(SB), X1
    MOVOU runtime·aeskeysched+16(SB), X2
    MOVOU runtime·aeskeysched+32(SB), X3

    MOVQ BX, X0
    PINSRQ $1, AX, X0

    AESENC X1, X0
    AESENC X2, X0
    AESENC X3, X0

    MOVQ X0, ret+16(FP)
    RET

// func Hash128(value [16]byte, seed uintptr) uintptr
TEXT ·Hash128(SB), NOSPLIT, $0-32
    LEAQ value+0(FP), AX
    MOVQ seed+16(FP), BX
    MOVQ $16, CX

    MOVQ BX, X0                      // 64 bits of per-table hash seed
    PINSRW $4, CX, X0                // 16 bits of length
    PSHUFHW $0, X0, X0               // repeat length 4 times total
    PXOR runtime·aeskeysched(SB), X0 // xor in per-process seed
    AESENC X0, X0                    // scramble seed

    MOVOU (AX), X1
    PXOR X0, X1
    AESENC X1, X1
    AESENC X1, X1
    AESENC X1, X1

    MOVQ X1, ret+24(FP)
    RET

// func MultiHashUint32Array(hashes []uintptr, values sparse.Uint32Array, seed uintptr)
TEXT ·MultiHashUint32Array(SB), NOSPLIT, $0-56
    MOVQ hashes_base+0(FP), AX
    MOVQ values_array_ptr+24(FP), BX
    MOVQ values_array_len+32(FP), CX
    MOVQ values_array_off+40(FP), R8
    MOVQ seed+48(FP), DX

    MOVOU runtime·aeskeysched+0(SB), X1
    MOVOU runtime·aeskeysched+16(SB), X2
    MOVOU runtime·aeskeysched+32(SB), X3

    XORQ SI, SI
    JMP test
loop:
    MOVQ DX, X0
    PINSRD $2, (BX), X0

    AESENC X1, X0
    AESENC X2, X0
    AESENC X3, X0

    MOVQ X0, (AX)(SI*8)
    INCQ SI
    ADDQ R8, BX
test:
    CMPQ SI, CX
    JNE loop
    RET

// func MultiHashUint64Array(hashes []uintptr, values sparse.Uint64Array, seed uintptr)
TEXT ·MultiHashUint64Array(SB), NOSPLIT, $0-56
    MOVQ hashes_base+0(FP), AX
    MOVQ values_array_ptr+24(FP), BX
    MOVQ values_array_len+32(FP), CX
    MOVQ values_array_off+40(FP), R8
    MOVQ seed+48(FP), DX

    MOVOU runtime·aeskeysched+0(SB), X1
    MOVOU runtime·aeskeysched+16(SB), X2
    MOVOU runtime·aeskeysched+32(SB), X3

    XORQ SI, SI
    JMP test
loop:
    MOVQ DX, X0
    PINSRQ $1, (BX), X0

    AESENC X1, X0
    AESENC X2, X0
    AESENC X3, X0

    MOVQ X0, (AX)(SI*8)
    INCQ SI
    ADDQ R8, BX
test:
    CMPQ SI, CX
    JNE loop
    RET

// func MultiHashUint128Array(hashes []uintptr, values sparse.Uint128Array, seed uintptr)
TEXT ·MultiHashUint128Array(SB), NOSPLIT, $0-56
    MOVQ hashes_base+0(FP), AX
    MOVQ values_array_ptr+24(FP), BX
    MOVQ values_array_len+32(FP), CX
    MOVQ values_array_off+40(FP), R8
    MOVQ seed+48(FP), DX
    MOVQ $16, DI

    MOVQ DX, X0
    PINSRW $4, DI, X0
    PSHUFHW $0, X0, X0
    PXOR runtime·aeskeysched(SB), X0
    AESENC X0, X0

    XORQ SI, SI
    JMP test
loop:
    MOVOU (BX), X1

    PXOR X0, X1
    AESENC X1, X1
    AESENC X1, X1
    AESENC X1, X1

    MOVQ X1, (AX)(SI*8)
    INCQ SI
    ADDQ R8, BX
test:
    CMPQ SI, CX
    JNE loop
    RET


================================================
FILE: hashprobe/aeshash/aeshash_purego.go
================================================
//go:build purego || !amd64

package aeshash

import "github.com/segmentio/parquet-go/sparse"

// Enabled always returns false since we assume that AES instructions are not
// available by default.
func Enabled() bool { return false }

const unsupported = "BUG: AES hash is not available on this platform"

func Hash32(value uint32, seed uintptr) uintptr { panic(unsupported) }

func Hash64(value uint64, seed uintptr) uintptr { panic(unsupported) }

func Hash128(value [16]byte, seed uintptr) uintptr { panic(unsupported) }

func MultiHashUint32Array(hashes []uintptr, values sparse.Uint32Array, seed uintptr) {
	panic(unsupported)
}

func MultiHashUint64Array(hashes []uintptr, values sparse.Uint64Array, seed uintptr) {
	panic(unsupported)
}

func MultiHashUint128Array(hashes []uintptr, values sparse.Uint128Array, seed uintptr) {
	panic(unsupported)
}


================================================
FILE: hashprobe/aeshash/aeshash_test.go
================================================
package aeshash

import (
	"encoding/binary"
	"testing"
	"time"
	"unsafe"
)

//go:noescape
//go:linkname runtime_memhash32 runtime.memhash32
func runtime_memhash32(data unsafe.Pointer, seed uintptr) uintptr

//go:noescape
//go:linkname runtime_memhash64 runtime.memhash64
func runtime_memhash64(data unsafe.Pointer, seed uintptr) uintptr

//go:noescape
//go:linkname runtime_memhash runtime.memhash
func runtime_memhash(data unsafe.Pointer, seed, size uintptr) uintptr

func memhash32(data uint32, seed uintptr) uintptr {
	return runtime_memhash32(unsafe.Pointer(&data), seed)
}

func memhash64(data uint64, seed uintptr) uintptr {
	return runtime_memhash64(unsafe.Pointer(&data), seed)
}

func memhash128(data [16]byte, seed uintptr) uintptr {
	return runtime_memhash(unsafe.Pointer(&data), seed, 16)
}

func TestHash32(t *testing.T) {
	if !Enabled() {
		t.Skip("AES hash not supported on this platform")
	}

	h0 := memhash32(42, 1)
	h1 := Hash32(42, 1)

	if h0 != h1 {
		t.Errorf("want=%016x got=%016x", h0, h1)
	}
}

func TestMultiHash32(t *testing.T) {
	if !Enabled() {
		t.Skip("AES hash not supported on this platform")
	}

	const N = 10
	hashes := [N]uintptr{}
	values := [N]uint32{}
	seed := uintptr(32)

	for i := range values {
		values[i] = uint32(i)
	}

	MultiHash32(hashes[:], values[:], seed)

	for i := range values {
		h := Hash32(values[i], seed)

		if h != hashes[i] {
			t.Errorf("hash(%d): want=%016x got=%016x", values[i], h, hashes[i])
		}
	}
}

func TestHash64(t *testing.T) {
	if !Enabled() {
		t.Skip("AES hash not supported on this platform")
	}

	h0 := memhash64(42, 1)
	h1 := Hash64(42, 1)

	if h0 != h1 {
		t.Errorf("want=%016x got=%016x", h0, h1)
	}
}

func TestMultiHash64(t *testing.T) {
	if !Enabled() {
		t.Skip("AES hash not supported on this platform")
	}

	const N = 10
	hashes := [N]uintptr{}
	values := [N]uint64{}
	seed := uintptr(64)

	for i := range values {
		values[i] = uint64(i)
	}

	MultiHash64(hashes[:], values[:], seed)

	for i := range values {
		h := Hash64(values[i], seed)

		if h != hashes[i] {
			t.Errorf("hash(%d): want=%016x got=%016x", values[i], h, hashes[i])
		}
	}
}

func BenchmarkMultiHash64(b *testing.B) {
	if !Enabled() {
		b.Skip("AES hash not supported on this platform")
	}

	hashes := [512]uintptr{}
	values := [512]uint64{}
	b.SetBytes(8 * int64(len(hashes)))
	benchmarkHashThroughput(b, func(seed uintptr) int {
		MultiHash64(hashes[:], values[:], seed)
		return len(hashes)
	})
}

func TestHash128(t *testing.T) {
	if !Enabled() {
		t.Skip("AES hash not supported on this platform")
	}

	h0 := memhash128([16]byte{0: 42}, 1)
	h1 := Hash128([16]byte{0: 42}, 1)

	if h0 != h1 {
		t.Errorf("want=%016x got=%016x", h0, h1)
	}
}

func TestMultiHash128(t *testing.T) {
	if !Enabled() {
		t.Skip("AES hash not supported on this platform")
	}

	const N = 10
	hashes := [N]uintptr{}
	values := [N][16]byte{}
	seed := uintptr(128)

	for i := range values {
		binary.LittleEndian.PutUint64(values[i][:8], uint64(i))
	}

	MultiHash128(hashes[:], values[:], seed)

	for i := range values {
		h := Hash128(values[i], seed)

		if h != hashes[i] {
			t.Errorf("hash(%d): want=%016x got=%016x", values[i], h, hashes[i])
		}
	}
}

func benchmarkHashThroughput(b *testing.B, f func(seed uintptr) int) {
	hashes := int64(0)
	start := time.Now()

	for i := 0; i < b.N; i++ {
		hashes += int64(f(uintptr(i)))
	}

	seconds := time.Since(start).Seconds()
	b.ReportMetric(float64(hashes)/seconds, "hash/s")
}


================================================
FILE: hashprobe/hashprobe.go
================================================
// Package hashprobe provides implementations of probing tables for various
// data types.
//
// Probing tables are specialized hash tables supporting only a single
// "probing" operation which behave like a "lookup or insert". When a key
// is probed, either its value is retrieved if it already existed in the table,
// or it is inserted and assigned its index in the insert sequence as value.
//
// Values are represented as signed 32 bits integers, which means that probing
// tables defined in this package may contain at most 2^31-1 entries.
//
// Probing tables have a method named Probe with the following signature:
//
//	func (t *Int64Table) Probe(keys []int64, values []int32) int {
//		...
//	}
//
// The method takes an array of keys to probe as first argument, an array of
// values where the indexes of each key will be written as second argument, and
// returns the number of keys that were inserted during the call.
//
// Applications that need to determine which keys were inserted can capture the
// length of the probing table prior to the call, and scan the list of values
// looking for indexes greater or equal to the length of the table before the
// call.
package hashprobe

import (
	cryptoRand "crypto/rand"
	"encoding/binary"
	"math"
	"math/bits"
	"math/rand"
	"sync"

	"github.com/segmentio/parquet-go/hashprobe/aeshash"
	"github.com/segmentio/parquet-go/hashprobe/wyhash"
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
)

const (
	// Number of probes tested per iteration. This parameter balances between
	// the amount of memory allocated on the stack to hold the computed hashes
	// of the keys being probed, and amortizing the baseline cost of the probing
	// algorithm.
	//
	// The larger the value, the more memory is required, but lower the baseline
	// cost will be.
	//
	// We chose a value that is somewhat large, resulting in reserving 2KiB of
	// stack but mostly erasing the baseline cost.
	probesPerLoop = 256
)

var (
	prngSeed   [8]byte
	prngMutex  sync.Mutex
	prngSource rand.Source64
)

func init() {
	_, err := cryptoRand.Read(prngSeed[:])
	if err != nil {
		panic("cannot seed random number generator from system source: " + err.Error())
	}
	seed := int64(binary.LittleEndian.Uint64(prngSeed[:]))
	prngSource = rand.NewSource(seed).(rand.Source64)
}

func tableSizeAndMaxLen(groupSize, numValues int, maxLoad float64) (size, maxLen int) {
	n := int(math.Ceil((1 / maxLoad) * float64(numValues)))
	size = nextPowerOf2((n + (groupSize - 1)) / groupSize)
	maxLen = int(math.Ceil(maxLoad * float64(groupSize*size)))
	return
}

func nextPowerOf2(n int) int {
	return 1 << (64 - bits.LeadingZeros64(uint64(n-1)))
}

func randSeed() uintptr {
	prngMutex.Lock()
	defer prngMutex.Unlock()
	return uintptr(prngSource.Uint64())
}

type Int32Table struct{ table32 }

func NewInt32Table(cap int, maxLoad float64) *Int32Table {
	return &Int32Table{makeTable32(cap, maxLoad)}
}

func (t *Int32Table) Reset() { t.reset() }

func (t *Int32Table) Len() int { return t.len }

func (t *Int32Table) Cap() int { return t.size() }

func (t *Int32Table) Probe(keys, values []int32) int {
	return t.probe(unsafecast.Int32ToUint32(keys), values)
}

func (t *Int32Table) ProbeArray(keys sparse.Int32Array, values []int32) int {
	return t.probeArray(keys.Uint32Array(), values)
}

type Float32Table struct{ table32 }

func NewFloat32Table(cap int, maxLoad float64) *Float32Table {
	return &Float32Table{makeTable32(cap, maxLoad)}
}

func (t *Float32Table) Reset() { t.reset() }

func (t *Float32Table) Len() int { return t.len }

func (t *Float32Table) Cap() int { return t.size() }

func (t *Float32Table) Probe(keys []float32, values []int32) int {
	return t.probe(unsafecast.Float32ToUint32(keys), values)
}

func (t *Float32Table) ProbeArray(keys sparse.Float32Array, values []int32) int {
	return t.probeArray(keys.Uint32Array(), values)
}

type Uint32Table struct{ table32 }

func NewUint32Table(cap int, maxLoad float64) *Uint32Table {
	return &Uint32Table{makeTable32(cap, maxLoad)}
}

func (t *Uint32Table) Reset() { t.reset() }

func (t *Uint32Table) Len() int { return t.len }

func (t *Uint32Table) Cap() int { return t.size() }

func (t *Uint32Table) Probe(keys []uint32, values []int32) int {
	return t.probe(keys, values)
}

func (t *Uint32Table) ProbeArray(keys sparse.Uint32Array, values []int32) int {
	return t.probeArray(keys, values)
}

// table32 is the generic implementation of probing tables for 32 bit types.
//
// The table uses the following memory layout:
//
//	[group 0][group 1][...][group N]
//
// Each group contains up to 7 key/value pairs, and is exactly 64 bytes in size,
// which allows it to fit within a single cache line, and ensures that probes
// can be performed with a single memory load per key.
//
// Groups fill up by appending new entries to the keys and values arrays. When a
// group is full, the probe checks the next group.
//
// https://en.wikipedia.org/wiki/Linear_probing
type table32 struct {
	len     int
	maxLen  int
	maxLoad float64
	seed    uintptr
	table   []table32Group
}

const table32GroupSize = 7

type table32Group struct {
	keys   [table32GroupSize]uint32
	values [table32GroupSize]uint32
	bits   uint32
	_      uint32
}

func makeTable32(cap int, maxLoad float64) (t table32) {
	if maxLoad < 0 || maxLoad > 1 {
		panic("max load of probing table must be a value between 0 and 1")
	}
	if cap < table32GroupSize {
		cap = table32GroupSize
	}
	t.init(cap, maxLoad)
	return t
}

func (t *table32) size() int {
	return table32GroupSize * len(t.table)
}

func (t *table32) init(cap int, maxLoad float64) {
	size, maxLen := tableSizeAndMaxLen(table32GroupSize, cap, maxLoad)
	*t = table32{
		maxLen:  maxLen,
		maxLoad: maxLoad,
		seed:    randSeed(),
		table:   make([]table32Group, size),
	}
}

func (t *table32) grow(totalValues int) {
	tmp := table32{}
	tmp.init(totalValues, t.maxLoad)
	tmp.len = t.len

	hashes := make([]uintptr, table32GroupSize)
	modulo := uintptr(len(tmp.table)) - 1

	for i := range t.table {
		g := &t.table[i]
		n := bits.OnesCount32(g.bits)

		if aeshash.Enabled() {
			aeshash.MultiHash32(hashes[:n], g.keys[:n], tmp.seed)
		} else {
			wyhash.MultiHash32(hashes[:n], g.keys[:n], tmp.seed)
		}

		for j, hash := range hashes[:n] {
			for {
				group := &tmp.table[hash&modulo]

				if n := bits.OnesCount32(group.bits); n < table32GroupSize {
					group.bits = (group.bits << 1) | 1
					group.keys[n] = g.keys[j]
					group.values[n] = g.values[j]
					break
				}

				hash++
			}
		}
	}

	*t = tmp
}

func (t *table32) reset() {
	t.len = 0

	for i := range t.table {
		t.table[i] = table32Group{}
	}
}

func (t *table32) probe(keys []uint32, values []int32) int {
	return t.probeArray(sparse.MakeUint32Array(keys), values)
}

func (t *table32) probeArray(keys sparse.Uint32Array, values []int32) int {
	numKeys := keys.Len()

	if totalValues := t.len + numKeys; totalValues > t.maxLen {
		t.grow(totalValues)
	}

	var hashes [probesPerLoop]uintptr
	var baseLength = t.len
	var useAesHash = aeshash.Enabled()

	_ = values[:numKeys]

	for i := 0; i < numKeys; {
		j := len(hashes) + i
		n := len(hashes)

		if j > numKeys {
			j = numKeys
			n = numKeys - i
		}

		k := keys.Slice(i, j)
		v := values[i:j:j]
		h := hashes[:n:n]

		if useAesHash {
			aeshash.MultiHashUint32Array(h, k, t.seed)
		} else {
			wyhash.MultiHashUint32Array(h, k, t.seed)
		}

		t.len = multiProbe32(t.table, t.len, h, k, v)
		i = j
	}

	return t.len - baseLength
}

func multiProbe32Default(table []table32Group, numKeys int, hashes []uintptr, keys sparse.Uint32Array, values []int32) int {
	modulo := uintptr(len(table)) - 1

	for i, hash := range hashes {
		key := keys.Index(i)
		for {
			group := &table[hash&modulo]
			index := table32GroupSize
			value := int32(0)

			for j, k := range group.keys {
				if k == key {
					index = j
					break
				}
			}

			if n := bits.OnesCount32(group.bits); index < n {
				value = int32(group.values[index])
			} else {
				if n == table32GroupSize {
					hash++
					continue
				}

				value = int32(numKeys)
				group.bits = (group.bits << 1) | 1
				group.keys[n] = key
				group.values[n] = uint32(value)
				numKeys++
			}

			values[i] = value
			break
		}
	}

	return numKeys
}

type Int64Table struct{ table64 }

func NewInt64Table(cap int, maxLoad float64) *Int64Table {
	return &Int64Table{makeTable64(cap, maxLoad)}
}

func (t *Int64Table) Reset() { t.reset() }

func (t *Int64Table) Len() int { return t.len }

func (t *Int64Table) Cap() int { return t.size() }

func (t *Int64Table) Probe(keys []int64, values []int32) int {
	return t.probe(unsafecast.Int64ToUint64(keys), values)
}

func (t *Int64Table) ProbeArray(keys sparse.Int64Array, values []int32) int {
	return t.probeArray(keys.Uint64Array(), values)
}

type Float64Table struct{ table64 }

func NewFloat64Table(cap int, maxLoad float64) *Float64Table {
	return &Float64Table{makeTable64(cap, maxLoad)}
}

func (t *Float64Table) Reset() { t.reset() }

func (t *Float64Table) Len() int { return t.len }

func (t *Float64Table) Cap() int { return t.size() }

func (t *Float64Table) Probe(keys []float64, values []int32) int {
	return t.probe(unsafecast.Float64ToUint64(keys), values)
}

func (t *Float64Table) ProbeArray(keys sparse.Float64Array, values []int32) int {
	return t.probeArray(keys.Uint64Array(), values)
}

type Uint64Table struct{ table64 }

func NewUint64Table(cap int, maxLoad float64) *Uint64Table {
	return &Uint64Table{makeTable64(cap, maxLoad)}
}

func (t *Uint64Table) Reset() { t.reset() }

func (t *Uint64Table) Len() int { return t.len }

func (t *Uint64Table) Cap() int { return t.size() }

func (t *Uint64Table) Probe(keys []uint64, values []int32) int {
	return t.probe(keys, values)
}

func (t *Uint64Table) ProbeArray(keys sparse.Uint64Array, values []int32) int {
	return t.probeArray(keys, values)
}

// table64 is the generic implementation of probing tables for 64 bit types.
//
// The table uses a layout similar to the one documented on the table for 32 bit
// keys (see table32). Each group holds up to 4 key/value pairs (instead of 7
// like table32) so that each group fits in a single CPU cache line. This table
// version has a bit lower memory density, with ~23% of table memory being used
// for padding.
//
// Technically we could hold up to 5 entries per group and still fit within the
// 64 bytes of a CPU cache line; on x86 platforms, AVX2 registers can only hold
// four 64 bit values, we would need twice as many instructions per probe if the
// groups were holding 5 values. The trade off of memory for compute efficiency
// appeared to be the right choice at the time.
type table64 struct {
	len     int
	maxLen  int
	maxLoad float64
	seed    uintptr
	table   []table64Group
}

const table64GroupSize = 4

type table64Group struct {
	keys   [table64GroupSize]uint64
	values [table64GroupSize]uint32
	bits   uint32
	_      uint32
	_      uint32
	_      uint32
}

func makeTable64(cap int, maxLoad float64) (t table64) {
	if maxLoad < 0 || maxLoad > 1 {
		panic("max load of probing table must be a value between 0 and 1")
	}
	if cap < table64GroupSize {
		cap = table64GroupSize
	}
	t.init(cap, maxLoad)
	return t
}

func (t *table64) size() int {
	return table64GroupSize * len(t.table)
}

func (t *table64) init(cap int, maxLoad float64) {
	size, maxLen := tableSizeAndMaxLen(table64GroupSize, cap, maxLoad)
	*t = table64{
		maxLen:  maxLen,
		maxLoad: maxLoad,
		seed:    randSeed(),
		table:   make([]table64Group, size),
	}
}

func (t *table64) grow(totalValues int) {
	tmp := table64{}
	tmp.init(totalValues, t.maxLoad)
	tmp.len = t.len

	hashes := make([]uintptr, table64GroupSize)
	modulo := uintptr(len(tmp.table)) - 1

	for i := range t.table {
		g := &t.table[i]
		n := bits.OnesCount32(g.bits)

		if aeshash.Enabled() {
			aeshash.MultiHash64(hashes[:n], g.keys[:n], tmp.seed)
		} else {
			wyhash.MultiHash64(hashes[:n], g.keys[:n], tmp.seed)
		}

		for j, hash := range hashes[:n] {
			for {
				group := &tmp.table[hash&modulo]

				if n := bits.OnesCount32(group.bits); n < table64GroupSize {
					group.bits = (group.bits << 1) | 1
					group.keys[n] = g.keys[j]
					group.values[n] = g.values[j]
					break
				}

				hash++
			}
		}
	}

	*t = tmp
}

func (t *table64) reset() {
	t.len = 0

	for i := range t.table {
		t.table[i] = table64Group{}
	}
}

func (t *table64) probe(keys []uint64, values []int32) int {
	return t.probeArray(sparse.MakeUint64Array(keys), values)
}

func (t *table64) probeArray(keys sparse.Uint64Array, values []int32) int {
	numKeys := keys.Len()

	if totalValues := t.len + numKeys; totalValues > t.maxLen {
		t.grow(totalValues)
	}

	var hashes [probesPerLoop]uintptr
	var baseLength = t.len
	var useAesHash = aeshash.Enabled()

	_ = values[:numKeys]

	for i := 0; i < numKeys; {
		j := len(hashes) + i
		n := len(hashes)

		if j > numKeys {
			j = numKeys
			n = numKeys - i
		}

		k := keys.Slice(i, j)
		v := values[i:j:j]
		h := hashes[:n:n]

		if useAesHash {
			aeshash.MultiHashUint64Array(h, k, t.seed)
		} else {
			wyhash.MultiHashUint64Array(h, k, t.seed)
		}

		t.len = multiProbe64(t.table, t.len, h, k, v)
		i = j
	}

	return t.len - baseLength
}

func multiProbe64Default(table []table64Group, numKeys int, hashes []uintptr, keys sparse.Uint64Array, values []int32) int {
	modulo := uintptr(len(table)) - 1

	for i, hash := range hashes {
		key := keys.Index(i)
		for {
			group := &table[hash&modulo]
			index := table64GroupSize
			value := int32(0)

			for i, k := range group.keys {
				if k == key {
					index = i
					break
				}
			}

			if n := bits.OnesCount32(group.bits); index < n {
				value = int32(group.values[index])
			} else {
				if n == table64GroupSize {
					hash++
					continue
				}

				value = int32(numKeys)
				group.bits = (group.bits << 1) | 1
				group.keys[n] = key
				group.values[n] = uint32(value)
				numKeys++
			}

			values[i] = value
			break
		}
	}

	return numKeys
}

type Uint128Table struct{ table128 }

func NewUint128Table(cap int, maxLoad float64) *Uint128Table {
	return &Uint128Table{makeTable128(cap, maxLoad)}
}

func (t *Uint128Table) Reset() { t.reset() }

func (t *Uint128Table) Len() int { return t.len }

func (t *Uint128Table) Cap() int { return t.cap }

func (t *Uint128Table) Probe(keys [][16]byte, values []int32) int {
	return t.probe(keys, values)
}

func (t *Uint128Table) ProbeArray(keys sparse.Uint128Array, values []int32) int {
	return t.probeArray(keys, values)
}

// table128 is the generic implementation of probing tables for 128 bit types.
//
// This table uses the following memory layout:
//
//	[key A][key B][...][value A][value B][...]
//
// The table stores values as their actual value plus one, and uses zero as a
// sentinel to determine whether a slot is occupied. A linear probing strategy
// is used to resolve conflicts. This approach results in at most two memory
// loads for every four keys being tested, since the location of a key and its
// corresponding value will not be contiguous on the same CPU cache line, but
// a cache line can hold four 16 byte keys.
type table128 struct {
	len     int
	cap     int
	maxLen  int
	maxLoad float64
	seed    uintptr
	table   []byte
}

func makeTable128(cap int, maxLoad float64) (t table128) {
	if maxLoad < 0 || maxLoad > 1 {
		panic("max load of probing table must be a value between 0 and 1")
	}
	if cap < 8 {
		cap = 8
	}
	t.init(cap, maxLoad)
	return t
}

func (t *table128) init(cap int, maxLoad float64) {
	size, maxLen := tableSizeAndMaxLen(1, cap, maxLoad)
	*t = table128{
		cap:     size,
		maxLen:  maxLen,
		maxLoad: maxLoad,
		seed:    randSeed(),
		table:   make([]byte, 16*size+4*size),
	}
}

func (t *table128) kv() (keys [][16]byte, values []int32) {
	i := t.cap * 16
	return unsafecast.BytesToUint128(t.table[:i]), unsafecast.BytesToInt32(t.table[i:])
}

func (t *table128) grow(totalValues int) {
	tmp := table128{}
	tmp.init(totalValues, t.maxLoad)
	tmp.len = t.len

	keys, values := t.kv()
	hashes := make([]uintptr, probesPerLoop)
	useAesHash := aeshash.Enabled()

	_ = values[:len(keys)]

	for i := 0; i < len(keys); {
		j := len(hashes) + i
		n := len(hashes)

		if j > len(keys) {
			j = len(keys)
			n = len(keys) - i
		}

		h := hashes[:n:n]
		k := keys[i:j:j]
		v := values[i:j:j]

		if useAesHash {
			aeshash.MultiHash128(h, k, tmp.seed)
		} else {
			wyhash.MultiHash128(h, k, tmp.seed)
		}

		tmp.insert(h, k, v)
		i = j
	}

	*t = tmp
}

func (t *table128) insert(hashes []uintptr, keys [][16]byte, values []int32) {
	tableKeys, tableValues := t.kv()
	modulo := uintptr(t.cap) - 1

	for i, hash := range hashes {
		for {
			j := hash & modulo
			v := tableValues[j]

			if v == 0 {
				tableKeys[j] = keys[i]
				tableValues[j] = values[i]
				break
			}

			hash++
		}
	}
}

func (t *table128) reset() {
	t.len = 0

	for i := range t.table {
		t.table[i] = 0
	}
}

func (t *table128) probe(keys [][16]byte, values []int32) int {
	return t.probeArray(sparse.MakeUint128Array(keys), values)
}

func (t *table128) probeArray(keys sparse.Uint128Array, values []int32) int {
	numKeys := keys.Len()

	if totalValues := t.len + numKeys; totalValues > t.maxLen {
		t.grow(totalValues)
	}

	var hashes [probesPerLoop]uintptr
	var baseLength = t.len
	var useAesHash = aeshash.Enabled()

	_ = values[:numKeys]

	for i := 0; i < numKeys; {
		j := len(hashes) + i
		n := len(hashes)

		if j > numKeys {
			j = numKeys
			n = numKeys - i
		}

		k := keys.Slice(i, j)
		v := values[i:j:j]
		h := hashes[:n:n]

		if useAesHash {
			aeshash.MultiHashUint128Array(h, k, t.seed)
		} else {
			wyhash.MultiHashUint128Array(h, k, t.seed)
		}

		t.len = multiProbe128(t.table, t.cap, t.len, h, k, v)
		i = j
	}

	return t.len - baseLength
}

func multiProbe128Default(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int {
	modulo := uintptr(tableCap) - 1
	offset := uintptr(tableCap) * 16
	tableKeys := unsafecast.BytesToUint128(table[:offset])
	tableValues := unsafecast.BytesToInt32(table[offset:])

	for i, hash := range hashes {
		key := keys.Index(i)
		for {
			j := hash & modulo
			v := tableValues[j]

			if v == 0 {
				values[i] = int32(tableLen)
				tableLen++
				tableKeys[j] = key
				tableValues[j] = int32(tableLen)
				break
			}

			if key == tableKeys[j] {
				values[i] = v - 1
				break
			}

			hash++
		}
	}

	return tableLen
}


================================================
FILE: hashprobe/hashprobe_amd64.go
================================================
//go:build !purego

package hashprobe

import (
	"github.com/segmentio/parquet-go/sparse"
	"golang.org/x/sys/cpu"
)

//go:noescape
func multiProbe32AVX2(table []table32Group, numKeys int, hashes []uintptr, keys sparse.Uint32Array, values []int32) int

//go:noescape
func multiProbe64AVX2(table []table64Group, numKeys int, hashes []uintptr, keys sparse.Uint64Array, values []int32) int

//go:noescape
func multiProbe128SSE2(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int

func multiProbe32(table []table32Group, numKeys int, hashes []uintptr, keys sparse.Uint32Array, values []int32) int {
	if cpu.X86.HasAVX2 {
		return multiProbe32AVX2(table, numKeys, hashes, keys, values)
	}
	return multiProbe32Default(table, numKeys, hashes, keys, values)
}

func multiProbe64(table []table64Group, numKeys int, hashes []uintptr, keys sparse.Uint64Array, values []int32) int {
	if cpu.X86.HasAVX2 {
		return multiProbe64AVX2(table, numKeys, hashes, keys, values)
	}
	return multiProbe64Default(table, numKeys, hashes, keys, values)
}

func multiProbe128(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int {
	if cpu.X86.HasSSE2 {
		return multiProbe128SSE2(table, tableCap, tableLen, hashes, keys, values)
	}
	return multiProbe128Default(table, tableCap, tableLen, hashes, keys, values)
}


================================================
FILE: hashprobe/hashprobe_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// This version of the probing algorithm for 32 bit keys takes advantage of
// the memory layout of table groups and SIMD instructions to accelerate the
// probing operations.
//
// The first 32 bytes of a table group contain the bit mask indicating which
// slots are in use, and the array of keys, which fits into a single vector
// register (YMM) and can be loaded and tested with a single instruction.
//
// A first version of the table group used the number of keys held in the
// group instead of a bit mask, which required the probing operation to
// reconstruct the bit mask during the lookup operation in order to identify
// which elements of the VPCMPEQD result should be retained. The extra CPU
// instructions used to reconstruct the bit mask had a measurable overhead.
// By holding the bit mask in the data structure, we can determine the number
// of keys in a group using the POPCNT instruction, and avoid recomputing the
// mask during lookups.
//
// func multiProbe32AVX2(table []table32Group, numKeys int, hashes []uintptr, keys sparse.Uint32Array, values []int32) int
TEXT ·multiProbe32AVX2(SB), NOSPLIT, $0-112
    MOVQ table_base+0(FP), AX
    MOVQ table_len+8(FP), BX
    MOVQ numKeys+24(FP), CX
    MOVQ hashes_base+32(FP), DX
    MOVQ hashes_len+40(FP), DI
    MOVQ keys_array_ptr+56(FP), R8
    MOVQ keys_array_off+72(FP), R15
    MOVQ values_base+80(FP), R9
    DECQ BX // modulo = len(table) - 1

    XORQ SI, SI
    JMP test
loop:
    MOVQ (DX)(SI*8), R10  // hash
    VPBROADCASTD (R8), Y0 // [key]
probe:
    MOVQ R10, R11
    ANDQ BX, R11 // hash & modulo
    SHLQ $6, R11 // x 64 (size of table32Group)
    LEAQ (AX)(R11*1), R12

    VMOVDQU (R12), Y1
    VPCMPEQD Y0, Y1, Y2
    VMOVMSKPS Y2, R11
    MOVL 56(R12), R13
    TESTL R11, R13
    JZ insert

    TZCNTL R11, R13
    MOVL 28(R12)(R13*4), R14
next:
    MOVL R14, (R9)(SI*4)
    INCQ SI
    ADDQ R15, R8
test:
    CMPQ SI, DI
    JNE loop
    MOVQ CX, ret+104(FP)
    VZEROUPPER
    RET
insert:
    CMPL R13, $0b1111111
    JE probeNextGroup

    MOVL R13, R11
    POPCNTL R13, R13
    MOVQ X0, R14 // key
    SHLL $1, R11
    ORL $1, R11
    MOVL R11, 56(R12)       // group.len = (group.len << 1) | 1
    MOVL R14, (R12)(R13*4)  // group.keys[i] = key
    MOVL CX, 28(R12)(R13*4) // group.values[i] = value
    MOVL CX, R14
    INCL CX
    JMP next
probeNextGroup:
    INCQ R10
    JMP probe

// func multiProbe64AVX2(table []table64Group, numKeys int, hashes []uintptr, keys sparse.Uint64Array, values []int32) int
TEXT ·multiProbe64AVX2(SB), NOSPLIT, $0-112
    MOVQ table_base+0(FP), AX
    MOVQ table_len+8(FP), BX
    MOVQ numKeys+24(FP), CX
    MOVQ hashes_base+32(FP), DX
    MOVQ hashes_len+40(FP), DI
    MOVQ keys_array_ptr+56(FP), R8
    MOVQ keys_array_off+72(FP), R15
    MOVQ values_base+80(FP), R9
    DECQ BX // modulo = len(table) - 1

    XORQ SI, SI
    JMP test
loop:
    MOVQ (DX)(SI*8), R10        // hash
    VPBROADCASTQ (R8), Y0 // [key]
probe:
    MOVQ R10, R11
    ANDQ BX, R11 // hash & modulo
    SHLQ $6, R11 // x 64 (size of table64Group)
    LEAQ (AX)(R11*1), R12

    VMOVDQU (R12), Y1
    VPCMPEQQ Y0, Y1, Y2
    VMOVMSKPD Y2, R11
    MOVL 48(R12), R13
    TESTL R11, R13
    JZ insert

    TZCNTL R11, R13
    MOVL 32(R12)(R13*4), R14
next:
    MOVL R14, (R9)(SI*4)
    INCQ SI
    ADDQ R15, R8
test:
    CMPQ SI, DI
    JNE loop
    MOVQ CX, ret+104(FP)
    VZEROUPPER
    RET
insert:
    CMPL R13, $0b1111
    JE probeNextGroup

    MOVL R13, R11
    POPCNTL R13, R13
    SHLL $1, R11
    ORL $1, R11
    MOVL R11, 48(R12)       // group.len = (group.len << 1) | 1
    MOVQ X0, (R12)(R13*8)   // group.keys[i] = key
    MOVL CX, 32(R12)(R13*4) // group.values[i] = value
    MOVL CX, R14
    INCL CX
    JMP next
probeNextGroup:
    INCQ R10
    JMP probe

// func multiProbe128SSE2(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int
TEXT ·multiProbe128SSE2(SB), NOSPLIT, $0-120
    MOVQ table_base+0(FP), AX
    MOVQ tableCap+24(FP), BX
    MOVQ tableLen+32(FP), CX
    MOVQ hashes_base+40(FP), DX
    MOVQ hashes_len+48(FP), DI
    MOVQ keys_array_ptr+64(FP), R8
    MOVQ keys_array_off+80(FP), R15
    MOVQ values_base+88(FP), R9

    MOVQ BX, R10
    SHLQ $4, R10
    LEAQ (AX)(R10*1), R10
    DECQ BX // modulo = tableCap - 1

    XORQ SI, SI
    JMP test
loop:
    MOVQ (DX)(SI*8), R11 // hash
    MOVOU (R8), X0       // key
probe:
    MOVQ R11, R12
    ANDQ BX, R12

    MOVL (R10)(R12*4), R14
    CMPL R14, $0
    JE insert

    SHLQ $4, R12
    MOVOU (AX)(R12*1), X1
    PCMPEQL X0, X1
    MOVMSKPS X1, R13
    CMPL R13, $0b1111
    JE next

    INCQ R11
    JMP probe
next:
    DECL R14
    MOVL R14, (R9)(SI*4)
    INCQ SI
    ADDQ R15, R8
test:
    CMPQ SI, DI
    JNE loop
    MOVQ CX, ret+112(FP)
    RET
insert:
    INCL CX
    MOVL CX, (R10)(R12*4)
    MOVL CX, R14
    SHLQ $4, R12
    MOVOU X0, (AX)(R12*1)
    JMP next


================================================
FILE: hashprobe/hashprobe_purego.go
================================================
//go:build purego || !amd64

package hashprobe

import (
	"github.com/segmentio/parquet-go/sparse"
)

func multiProbe32(table []table32Group, numKeys int, hashes []uintptr, keys sparse.Uint32Array, values []int32) int {
	return multiProbe32Default(table, numKeys, hashes, keys, values)
}

func multiProbe64(table []table64Group, numKeys int, hashes []uintptr, keys sparse.Uint64Array, values []int32) int {
	return multiProbe64Default(table, numKeys, hashes, keys, values)
}

func multiProbe128(table []byte, tableCap, tableLen int, hashes []uintptr, keys sparse.Uint128Array, values []int32) int {
	return multiProbe128Default(table, tableCap, tableLen, hashes, keys, values)
}


================================================
FILE: hashprobe/hashprobe_test.go
================================================
package hashprobe

import (
	"encoding/binary"
	"fmt"
	"math/rand"
	"testing"
	"time"
	"unsafe"
)

func TestTable32GroupSize(t *testing.T) {
	if n := unsafe.Sizeof(table32Group{}); n != 64 {
		t.Errorf("size of 32 bit table group is not 64 bytes: %d", n)
	}
}

func TestUint32TableProbeOneByOne(t *testing.T) {
	const N = 500
	table := NewUint32Table(0, 0.9)

	for n := 0; n < 2; n++ {
		// Do two passes, both should behave the same.
		for i := 1; i <= N; i++ {
			k := [1]uint32{}
			v := [1]int32{}

			k[0] = uint32(i)
			table.Probe(k[:], v[:])

			if v[0] != int32(i-1) {
				t.Errorf("wrong value probed for key=%d: want=%d got=%d", i, i-1, v[0])
			}
		}
	}
}

func TestUint32TableProbeBulk(t *testing.T) {
	const N = 999
	table := NewUint32Table(0, 0.9)

	k := make([]uint32, N)
	v := make([]int32, N)

	for i := range k {
		k[i] = uint32(i)
	}

	for n := 0; n < 2; n++ {
		table.Probe(k, v)

		for i := range v {
			if v[i] != int32(i) {
				t.Errorf("wrong value probed for key=%d: want=%d got=%d", k[i], i, v[i])
			}
		}

		if t.Failed() {
			break
		}

		for i := range v {
			v[i] = 0
		}
	}
}

func TestTable64GroupSize(t *testing.T) {
	if n := unsafe.Sizeof(table64Group{}); n != 64 {
		t.Errorf("size of 64 bit table group is not 64 bytes: %d", n)
	}
}

func TestUint64TableProbeOneByOne(t *testing.T) {
	const N = 500
	table := NewUint64Table(0, 0.9)

	for n := 0; n < 2; n++ {
		// Do two passes, both should behave the same.
		for i := 1; i <= N; i++ {
			k := [1]uint64{}
			v := [1]int32{}

			k[0] = uint64(i)
			table.Probe(k[:], v[:])

			if v[0] != int32(i-1) {
				t.Errorf("wrong value probed for key=%d: want=%d got=%d", i, i-1, v[0])
			}
		}
	}
}

func TestUint64TableProbeBulk(t *testing.T) {
	const N = 999
	table := NewUint64Table(0, 0.9)

	k := make([]uint64, N)
	v := make([]int32, N)

	for i := range k {
		k[i] = uint64(i)
	}

	for n := 0; n < 2; n++ {
		table.Probe(k, v)

		for i := range v {
			if v[i] != int32(i) {
				t.Errorf("wrong value probed for key=%d: want=%d got=%d", k[i], i, v[i])
			}
		}

		if t.Failed() {
			break
		}

		for i := range v {
			v[i] = 0
		}
	}
}

func TestUint128TableProbeOneByOne(t *testing.T) {
	const N = 500
	table := NewUint128Table(0, 0.9)

	for n := 0; n < 2; n++ {
		// Do two passes, both should behave the same.
		for i := 1; i <= N; i++ {
			k := [1][16]byte{}
			v := [1]int32{}

			binary.LittleEndian.PutUint64(k[0][:8], uint64(i))
			table.Probe(k[:], v[:])

			if v[0] != int32(i-1) {
				t.Errorf("wrong value probed for key=%x: want=%d got=%d", i, i-1, v[0])
			}
		}
	}
}

func TestUint128TableProbeBulk(t *testing.T) {
	const N = 999
	table := NewUint128Table(0, 0.9)

	k := make([][16]byte, N)
	v := make([]int32, N)

	for i := range k {
		binary.LittleEndian.PutUint64(k[i][:8], uint64(i))
	}

	for n := 0; n < 2; n++ {
		table.Probe(k, v)

		for i := range v {
			if v[i] != int32(i) {
				t.Errorf("wrong value probed for key=%x: want=%d got=%d", k[i], i, v[i])
			}
		}

		if t.Failed() {
			break
		}

		for i := range v {
			v[i] = 0
		}
	}
}

const (
	benchmarkProbesPerLoop = 500
	benchmarkMaxLoad       = 0.9
)

type uint32Table interface {
	Reset()
	Len() int
	Probe([]uint32, []int32) int
}

type uint32Map map[uint32]int32

func (m uint32Map) Reset() {
	for k := range m {
		delete(m, k)
	}
}

func (m uint32Map) Len() int {
	return len(m)
}

func (m uint32Map) Probe(keys []uint32, values []int32) (n int) {
	_ = values[:len(keys)]

	for i, k := range keys {
		v, ok := m[k]
		if !ok {
			v = int32(len(m))
			m[k] = v
			n++
		}
		values[i] = v
	}

	return n
}

func BenchmarkUint32Table(b *testing.B) {
	benchmarkUint32Table(b, func(size int) uint32Table { return NewUint32Table(size, benchmarkMaxLoad) })
}

func BenchmarkGoUint32Map(b *testing.B) {
	benchmarkUint32Table(b, func(size int) uint32Table { return make(uint32Map, size) })
}

func benchmarkUint32Table(b *testing.B, newTable func(size int) uint32Table) {
	for n := 100; n <= 1e6; n *= 10 {
		table := newTable(0)
		keys, values := generateUint32Table(n)

		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
			benchmarkUint32Loop(b, table.Probe, keys, values)
		})
	}
}

func benchmarkUint32Loop(b *testing.B, f func([]uint32, []int32) int, keys []uint32, values []int32) {
	i := 0
	j := benchmarkProbesPerLoop
	b.SetBytes(4 * int64(benchmarkProbesPerLoop))

	_ = keys[:len(values)]
	_ = values[:len(keys)]
	start := time.Now()

	for k := 0; k < b.N; k++ {
		if j > len(keys) {
			j = len(keys)
		}
		f(keys[i:j:j], values[i:j:j])
		if j == len(keys) {
			i, j = 0, benchmarkProbesPerLoop
		} else {
			i, j = j, j+benchmarkProbesPerLoop
		}
	}

	seconds := time.Since(start).Seconds()
	b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s")
}

func generateUint32Table(n int) ([]uint32, []int32) {
	prng := rand.New(rand.NewSource(int64(n)))
	keys := make([]uint32, n)
	values := make([]int32, n)

	for i := range keys {
		keys[i] = prng.Uint32()
	}

	return keys, values
}

type uint64Table interface {
	Reset()
	Len() int
	Probe([]uint64, []int32) int
}

type uint64Map map[uint64]int32

func (m uint64Map) Reset() {
	for k := range m {
		delete(m, k)
	}
}

func (m uint64Map) Len() int {
	return len(m)
}

func (m uint64Map) Probe(keys []uint64, values []int32) (n int) {
	_ = values[:len(keys)]

	for i, k := range keys {
		v, ok := m[k]
		if !ok {
			v = int32(len(m))
			m[k] = v
			n++
		}
		values[i] = v
	}

	return n
}

func BenchmarkUint64Table(b *testing.B) {
	benchmarkUint64Table(b, func(size int) uint64Table { return NewUint64Table(size, benchmarkMaxLoad) })
}

func BenchmarkGoUint64Map(b *testing.B) {
	benchmarkUint64Table(b, func(size int) uint64Table { return make(uint64Map, size) })
}

func benchmarkUint64Table(b *testing.B, newTable func(size int) uint64Table) {
	for n := 100; n <= 1e6; n *= 10 {
		table := newTable(0)
		keys, values := generateUint64Table(n)

		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
			benchmarkUint64Loop(b, table.Probe, keys, values)
		})
	}
}

func benchmarkUint64Loop(b *testing.B, f func([]uint64, []int32) int, keys []uint64, values []int32) {
	i := 0
	j := benchmarkProbesPerLoop
	b.SetBytes(8 * int64(benchmarkProbesPerLoop))

	_ = keys[:len(values)]
	_ = values[:len(keys)]
	start := time.Now()

	for k := 0; k < b.N; k++ {
		if j > len(keys) {
			j = len(keys)
		}
		f(keys[i:j:j], values[i:j:j])
		if j == len(keys) {
			i, j = 0, benchmarkProbesPerLoop
		} else {
			i, j = j, j+benchmarkProbesPerLoop
		}
	}

	seconds := time.Since(start).Seconds()
	b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s")
}

func generateUint64Table(n int) ([]uint64, []int32) {
	prng := rand.New(rand.NewSource(int64(n)))
	keys := make([]uint64, n)
	values := make([]int32, n)

	for i := range keys {
		keys[i] = prng.Uint64()
	}

	return keys, values
}

type uint128Table interface {
	Reset()
	Len() int
	Probe([][16]byte, []int32) int
}

type uint128Map map[[16]byte]int32

func (m uint128Map) Reset() {
	for k := range m {
		delete(m, k)
	}
}

func (m uint128Map) Len() int {
	return len(m)
}

func (m uint128Map) Probe(keys [][16]byte, values []int32) (n int) {
	_ = values[:len(keys)]

	for i, k := range keys {
		v, ok := m[k]
		if !ok {
			v = int32(len(m))
			m[k] = v
			n++
		}
		values[i] = v
	}

	return n
}

func BenchmarkUint128Table(b *testing.B) {
	benchmarkUint128Table(b, func(size int) uint128Table { return NewUint128Table(size, benchmarkMaxLoad) })
}

func BenchmarkGoUint128Map(b *testing.B) {
	benchmarkUint128Table(b, func(size int) uint128Table { return make(uint128Map, size) })
}

func benchmarkUint128Table(b *testing.B, newTable func(size int) uint128Table) {
	for n := 100; n <= 1e6; n *= 10 {
		table := newTable(0)
		keys, values := generateUint128Table(n)

		b.Run(fmt.Sprintf("N=%d", n), func(b *testing.B) {
			benchmarkUint128Loop(b, table.Probe, keys, values)
		})
	}
}

func benchmarkUint128Loop(b *testing.B, f func([][16]byte, []int32) int, keys [][16]byte, values []int32) {
	i := 0
	j := benchmarkProbesPerLoop
	b.SetBytes(16 * int64(benchmarkProbesPerLoop))

	_ = keys[:len(values)]
	_ = values[:len(keys)]
	start := time.Now()

	for k := 0; k < b.N; k++ {
		if j > len(keys) {
			j = len(keys)
		}
		f(keys[i:j:j], values[i:j:j])
		if j == len(keys) {
			i, j = 0, benchmarkProbesPerLoop
		} else {
			i, j = j, j+benchmarkProbesPerLoop
		}
	}

	seconds := time.Since(start).Seconds()
	b.ReportMetric(float64(benchmarkProbesPerLoop*b.N)/seconds, "probe/s")
}

func generateUint128Table(n int) ([][16]byte, []int32) {
	prng := rand.New(rand.NewSource(int64(n)))
	keys := make([][16]byte, n)
	values := make([]int32, n)

	for i := range keys {
		prng.Read(keys[i][:])
	}

	return keys, values
}


================================================
FILE: hashprobe/wyhash/wyhash.go
================================================
// Package wyhash implements a hashing algorithm derived from the Go runtime's
// internal hashing fallback, which uses a variation of the wyhash algorithm.
package wyhash

import (
	"encoding/binary"
	"math/bits"

	"github.com/segmentio/parquet-go/sparse"
)

const (
	m1 = 0xa0761d6478bd642f
	m2 = 0xe7037ed1a0b428db
	m3 = 0x8ebc6af09c88c6e3
	m4 = 0x589965cc75374cc3
	m5 = 0x1d8e4e27c47d124f
)

func mix(a, b uint64) uint64 {
	hi, lo := bits.Mul64(a, b)
	return hi ^ lo
}

func Hash32(value uint32, seed uintptr) uintptr {
	return uintptr(mix(m5^4, mix(uint64(value)^m2, uint64(value)^uint64(seed)^m1)))
}

func Hash64(value uint64, seed uintptr) uintptr {
	return uintptr(mix(m5^8, mix(value^m2, value^uint64(seed)^m1)))
}

func Hash128(value [16]byte, seed uintptr) uintptr {
	a := binary.LittleEndian.Uint64(value[:8])
	b := binary.LittleEndian.Uint64(value[8:])
	return uintptr(mix(m5^16, mix(a^m2, b^uint64(seed)^m1)))
}

func MultiHash32(hashes []uintptr, values []uint32, seed uintptr) {
	MultiHashUint32Array(hashes, sparse.MakeUint32Array(values), seed)
}

func MultiHash64(hashes []uintptr, values []uint64, seed uintptr) {
	MultiHashUint64Array(hashes, sparse.MakeUint64Array(values), seed)
}

func MultiHash128(hashes []uintptr, values [][16]byte, seed uintptr) {
	MultiHashUint128Array(hashes, sparse.MakeUint128Array(values), seed)
}


================================================
FILE: hashprobe/wyhash/wyhash_amd64.go
================================================
//go:build !purego

package wyhash

import "github.com/segmentio/parquet-go/sparse"

//go:noescape
func MultiHashUint32Array(hashes []uintptr, values sparse.Uint32Array, seed uintptr)

//go:noescape
func MultiHashUint64Array(hashes []uintptr, values sparse.Uint64Array, seed uintptr)

//go:noescape
func MultiHashUint128Array(hashes []uintptr, values sparse.Uint128Array, seed uintptr)


================================================
FILE: hashprobe/wyhash/wyhash_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define m1 0xa0761d6478bd642f
#define m2 0xe7037ed1a0b428db
#define m3 0x8ebc6af09c88c6e3
#define m4 0x589965cc75374cc3
#define m5 0x1d8e4e27c47d124f

// func MultiHashUint32Array(hashes []uintptr, values sparse.Uint32Array, seed uintptr)
TEXT ·MultiHashUint32Array(SB), NOSPLIT, $0-56
    MOVQ hashes_base+0(FP), R12
    MOVQ values_array_ptr+24(FP), R13
    MOVQ values_array_len+32(FP), R14
    MOVQ values_array_off+40(FP), R15
    MOVQ seed+48(FP), R11

    MOVQ $m1, R8
    MOVQ $m2, R9
    MOVQ $m5^4, R10
    XORQ R11, R8

    XORQ SI, SI
    JMP test
loop:
    MOVL (R13), AX
    MOVQ R8, BX

    XORQ AX, BX
    XORQ R9, AX

    MULQ BX
    XORQ DX, AX

    MULQ R10
    XORQ DX, AX

    MOVQ AX, (R12)(SI*8)
    INCQ SI
    ADDQ R15, R13
test:
    CMPQ SI, R14
    JNE loop
    RET

// func MultiHashUint64Array(hashes []uintptr, values sparse.Uint64Array, seed uintptr)
TEXT ·MultiHashUint64Array(SB), NOSPLIT, $0-56
    MOVQ hashes_base+0(FP), R12
    MOVQ values_array_ptr+24(FP), R13
    MOVQ values_array_len+32(FP), R14
    MOVQ values_array_off+40(FP), R15
    MOVQ seed+48(FP), R11

    MOVQ $m1, R8
    MOVQ $m2, R9
    MOVQ $m5^8, R10
    XORQ R11, R8

    XORQ SI, SI
    JMP test
loop:
    MOVQ (R13), AX
    MOVQ R8, BX

    XORQ AX, BX
    XORQ R9, AX

    MULQ BX
    XORQ DX, AX

    MULQ R10
    XORQ DX, AX

    MOVQ AX, (R12)(SI*8)
    INCQ SI
    ADDQ R15, R13
test:
    CMPQ SI, R14
    JNE loop
    RET

// func MultiHashUint128Array(hashes []uintptr, values sparse.Uint128Array, seed uintptr)
TEXT ·MultiHashUint128Array(SB), NOSPLIT, $0-56
    MOVQ hashes_base+0(FP), R12
    MOVQ values_array_ptr+24(FP), R13
    MOVQ values_array_len+32(FP), R14
    MOVQ values_array_off+40(FP), R15
    MOVQ seed+48(FP), R11

    MOVQ $m1, R8
    MOVQ $m2, R9
    MOVQ $m5^16, R10
    XORQ R11, R8

    XORQ SI, SI
    JMP test
loop:
    MOVQ 0(R13), AX
    MOVQ 8(R13), DX
    MOVQ R8, BX

    XORQ DX, BX
    XORQ R9, AX

    MULQ BX
    XORQ DX, AX

    MULQ R10
    XORQ DX, AX

    MOVQ AX, (R12)(SI*8)
    INCQ SI
    ADDQ R15, R13
test:
    CMPQ SI, R14
    JNE loop
    RET


================================================
FILE: hashprobe/wyhash/wyhash_purego.go
================================================
//go:build purego || !amd64

package wyhash

import "github.com/segmentio/parquet-go/sparse"

func MultiHashUint32Array(hashes []uintptr, values sparse.Uint32Array, seed uintptr) {
	for i := range hashes {
		hashes[i] = Hash32(values.Index(i), seed)
	}
}

func MultiHashUint64Array(hashes []uintptr, values sparse.Uint64Array, seed uintptr) {
	for i := range hashes {
		hashes[i] = Hash64(values.Index(i), seed)
	}
}

func MultiHashUint128Array(hashes []uintptr, values sparse.Uint128Array, seed uintptr) {
	for i := range hashes {
		hashes[i] = Hash128(values.Index(i), seed)
	}
}


================================================
FILE: hashprobe/wyhash/wyhash_test.go
================================================
package wyhash

import (
	"encoding/binary"
	"math/rand"
	"testing"
	"time"
)

func TestHash32(t *testing.T) {
	if h := Hash32(42, 1); h != 0xda93b6f668a0496e {
		t.Errorf("hash mismatch: %08x", h)
	}
}

func TestMultiHash32(t *testing.T) {
	const N = 10
	hashes := [N]uintptr{}
	values := [N]uint32{}
	seed := uintptr(32)

	for i := range values {
		values[i] = uint32(i)
	}

	MultiHash32(hashes[:], values[:], seed)

	for i := range values {
		h := Hash32(values[i], seed)

		if h != hashes[i] {
			t.Errorf("hash(%d): want=%08x got=%08x", values[i], h, hashes[i])
		}
	}
}

func BenchmarkHash32(b *testing.B) {
	b.SetBytes(8)
	value := rand.Uint32()
	benchmarkHashThroughput(b, func(seed uintptr) int {
		value = uint32(Hash32(value, seed))
		return 1
	})
}

func BenchmarkMultiHash32(b *testing.B) {
	hashes := [512]uintptr{}
	values := [512]uint32{}
	b.SetBytes(4 * int64(len(hashes)))
	benchmarkHashThroughput(b, func(seed uintptr) int {
		MultiHash32(hashes[:], values[:], seed)
		return len(hashes)
	})
}

func TestHash64(t *testing.T) {
	if h := Hash64(42, 1); h != 0x6e69a6ede6b5a25e {
		t.Errorf("hash mismatch: %016x", h)
	}
}

func TestMultiHash64(t *testing.T) {
	const N = 10
	hashes := [N]uintptr{}
	values := [N]uint64{}
	seed := uintptr(64)

	for i := range values {
		values[i] = uint64(i)
	}

	MultiHash64(hashes[:], values[:], seed)

	for i := range values {
		h := Hash64(values[i], seed)

		if h != hashes[i] {
			t.Errorf("hash(%d): want=%016x got=%016x", values[i], h, hashes[i])
		}
	}
}

func BenchmarkHash64(b *testing.B) {
	b.SetBytes(8)
	value := rand.Uint64()
	benchmarkHashThroughput(b, func(seed uintptr) int {
		value = uint64(Hash64(value, seed))
		return 1
	})
}

func BenchmarkMultiHash64(b *testing.B) {
	hashes := [512]uintptr{}
	values := [512]uint64{}
	b.SetBytes(8 * int64(len(hashes)))
	benchmarkHashThroughput(b, func(seed uintptr) int {
		MultiHash64(hashes[:], values[:], seed)
		return len(hashes)
	})
}

func TestHash128(t *testing.T) {
	if h := Hash128([16]byte{0: 42}, 1); h != 0xcd09fcdae9a79e7c {
		t.Errorf("hash mismatch: %016x", h)
	}
}

func TestMultiHash128(t *testing.T) {
	const N = 10
	hashes := [N]uintptr{}
	values := [N][16]byte{}
	seed := uintptr(64)

	for i := range values {
		binary.LittleEndian.PutUint64(values[i][:8], uint64(i))
	}

	MultiHash128(hashes[:], values[:], seed)

	for i := range values {
		h := Hash128(values[i], seed)

		if h != hashes[i] {
			t.Errorf("hash(%d): want=%016x got=%016x", values[i], h, hashes[i])
		}
	}
}

func BenchmarkHash128(b *testing.B) {
	b.SetBytes(8)
	hash := uintptr(0)
	value := [16]byte{}
	binary.LittleEndian.PutUint64(value[:8], rand.Uint64())
	binary.LittleEndian.PutUint64(value[8:], rand.Uint64())
	benchmarkHashThroughput(b, func(seed uintptr) int {
		hash = Hash128(value, seed)
		return 1
	})
	_ = hash
}

func BenchmarkMultiHash128(b *testing.B) {
	hashes := [512]uintptr{}
	values := [512][16]byte{}
	b.SetBytes(16 * int64(len(hashes)))
	benchmarkHashThroughput(b, func(seed uintptr) int {
		MultiHash128(hashes[:], values[:], seed)
		return len(hashes)
	})
}

func benchmarkHashThroughput(b *testing.B, f func(seed uintptr) int) {
	hashes := int64(0)
	start := time.Now()

	for i := 0; i < b.N; i++ {
		hashes += int64(f(uintptr(i)))
	}

	seconds := time.Since(start).Seconds()
	b.ReportMetric(float64(hashes)/seconds, "hash/s")
}


================================================
FILE: internal/bitpack/bitpack.go
================================================
// Package bitpack implements efficient bit packing and unpacking routines for
// integers of various bit widths.
package bitpack

// ByteCount returns the number of bytes needed to hold the given bit count.
func ByteCount(bitCount uint) int {
	return int((bitCount + 7) / 8)
}


================================================
FILE: internal/bitpack/masks_int32_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// -----------------------------------------------------------------------------
// Shuffle masks used to broadcast bytes of bit-packed valued into vector
// registers at positions where they can then be shifted into the right
// locations.
// -----------------------------------------------------------------------------

// Shuffle masks for unpacking values from bit widths 1 to 16.
//
// The masks are grouped in 32 bytes chunks containing 2 masks of 16 bytes, with
// the following layout:
//
// - The first mask is used to shuffle values from the 16 bytes of input into
//   the lower 16 bytes of output. These values are then shifted RIGHT to be
//   aligned on the begining of each 32 bit word.
//
// - The second mask selects values from the 16 bytes of input into the upper
//   16 bytes of output. These values are then shifted RIGHT to be aligned on
//   the beginning of each 32 bit word.
//
// The bit width is intended to be used as an index into this array, using this
// formula to convert from the index to a byte offset:
//
//      offset = 32 * (bitWidth - 1)
//
GLOBL ·shuffleInt32x1to16bits(SB), RODATA|NOPTR, $512

// 1 bit => 32 bits
// -----------------
// 0: [a,b,c,d,e,f,g,h]
// ...
DATA ·shuffleInt32x1to16bits+0+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+0+4(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+0+8(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+0+12(SB)/4, $0x80808000

DATA ·shuffleInt32x1to16bits+0+16(SB)/4, $0x80808000
DATA ·shuffleInt32x1to16bits+0+20(SB)/4, $0x80808000
DATA ·shuffleInt32x1to16bits+0+24(SB)/4, $0x80808000
DATA ·shuffleInt32x1to16bits+0+28(SB)/4, $0x80808000

// 2 bits => 32 bits
// -----------------
// 0: [a,a,b,b,c,c,d,d]
// 1: [e,e,f,f,g,g,h,h]
// ...
DATA ·shuffleInt32x1to16bits+32+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+32+4(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+32+8(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+32+12(SB)/4, $0x80808000

DATA ·shuffleInt32x1to16bits+32+16(SB)/4, $0x80808001
DATA ·shuffleInt32x1to16bits+32+20(SB)/4, $0x80808001
DATA ·shuffleInt32x1to16bits+32+24(SB)/4, $0x80808001
DATA ·shuffleInt32x1to16bits+32+28(SB)/4, $0x80808001

// 3 bits => 32 bits
// -----------------
// 0: [a,a,a,b,b,b,c,c]
// 1: [c,d,d,d,e,e,e,f]
// 2: [f,f,g,g,g,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+64+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+64+4(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+64+8(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+64+12(SB)/4, $0x80808001

DATA ·shuffleInt32x1to16bits+64+16(SB)/4, $0x80808001
DATA ·shuffleInt32x1to16bits+64+20(SB)/4, $0x80800201
DATA ·shuffleInt32x1to16bits+64+24(SB)/4, $0x80808002
DATA ·shuffleInt32x1to16bits+64+28(SB)/4, $0x80808002

// 4 bits => 32 bits
// -----------------
// 0: [a,a,a,a,b,b,b,b]
// 1: [c,c,c,c,d,d,d,d]
// 2: [e,e,e,e,f,f,f,f]
// 3: [g,g,g,g,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+96+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+96+4(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+96+8(SB)/4,  $0x80808001
DATA ·shuffleInt32x1to16bits+96+12(SB)/4, $0x80808001

DATA ·shuffleInt32x1to16bits+96+16(SB)/4, $0x80808002
DATA ·shuffleInt32x1to16bits+96+20(SB)/4, $0x80808002
DATA ·shuffleInt32x1to16bits+96+24(SB)/4, $0x80808003
DATA ·shuffleInt32x1to16bits+96+28(SB)/4, $0x80808003

// 5 bits => 32 bits
// -----------------
// 0: [a,a,a,a,a,b,b,b]
// 1: [b,b,c,c,c,c,c,d]
// 2: [d,d,d,d,e,e,e,e]
// 3: [e,f,f,f,f,f,g,g]
// 4: [g,g,g,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+128+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+128+4(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+128+8(SB)/4,  $0x80808001
DATA ·shuffleInt32x1to16bits+128+12(SB)/4, $0x80800201

DATA ·shuffleInt32x1to16bits+128+16(SB)/4, $0x80800302
DATA ·shuffleInt32x1to16bits+128+20(SB)/4, $0x80808003
DATA ·shuffleInt32x1to16bits+128+24(SB)/4, $0x80800403
DATA ·shuffleInt32x1to16bits+128+28(SB)/4, $0x80808004

// 6 bits => 32 bits
// -----------------
// 0: [a,a,a,a,a,a,b,b]
// 1: [b,b,b,b,c,c,c,c]
// 2: [c,c,d,d,d,d,d,d]
// 3: [e,e,e,e,e,e,f,f]
// 4: [f,f,f,f,g,g,g,g]
// 5: [g,g,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+160+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+160+4(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+160+8(SB)/4,  $0x80800201
DATA ·shuffleInt32x1to16bits+160+12(SB)/4, $0x80808002

DATA ·shuffleInt32x1to16bits+160+16(SB)/4, $0x80808003
DATA ·shuffleInt32x1to16bits+160+20(SB)/4, $0x80800403
DATA ·shuffleInt32x1to16bits+160+24(SB)/4, $0x80800504
DATA ·shuffleInt32x1to16bits+160+28(SB)/4, $0x80808005

// 7 bits => 32 bits
// -----------------
// 0: [a,a,a,a,a,a,a,b]
// 1: [b,b,b,b,b,b,c,c]
// 2: [c,c,c,c,c,d,d,d]
// 3: [d,d,d,d,e,e,e,e]
// 4: [e,e,e,f,f,f,f,f]
// 5: [f,f,g,g,g,g,g,g]
// 6: [g,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+192+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+192+4(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+192+8(SB)/4,  $0x80800201
DATA ·shuffleInt32x1to16bits+192+12(SB)/4, $0x80800302

DATA ·shuffleInt32x1to16bits+192+16(SB)/4, $0x80800403
DATA ·shuffleInt32x1to16bits+192+20(SB)/4, $0x80800504
DATA ·shuffleInt32x1to16bits+192+24(SB)/4, $0x80800605
DATA ·shuffleInt32x1to16bits+192+28(SB)/4, $0x80808006

// 8 bits => 32 bits
// -----------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [b,b,b,b,b,b,b,b]
// 2: [c,c,c,c,c,c,c,c]
// 3: [d,d,d,d,d,d,d,d]
// 4: [e,e,e,e,e,e,e,e]
// 5: [f,f,f,f,f,f,f,f]
// 6: [g,g,g,g,g,g,g,g]
// 7: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+224+0(SB)/4,  $0x80808000
DATA ·shuffleInt32x1to16bits+224+4(SB)/4,  $0x80808001
DATA ·shuffleInt32x1to16bits+224+8(SB)/4,  $0x80808002
DATA ·shuffleInt32x1to16bits+224+12(SB)/4, $0x80808003

DATA ·shuffleInt32x1to16bits+224+16(SB)/4, $0x80808004
DATA ·shuffleInt32x1to16bits+224+20(SB)/4, $0x80808005
DATA ·shuffleInt32x1to16bits+224+24(SB)/4, $0x80808006
DATA ·shuffleInt32x1to16bits+224+28(SB)/4, $0x80808007

// 9 bits => 32 bits
// -----------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,b,b,b,b,b,b,b]
// 2: [b,b,c,c,c,c,c,c]
// 3: [c,c,c,d,d,d,d,d]
// 4: [d,d,d,d,e,e,e,e]
// 5: [e,e,e,e,e,f,f,f]
// 6: [f,f,f,f,f,f,g,g]
// 7: [g,g,g,g,g,g,g,h]
// 8: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+256+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+256+4(SB)/4,  $0x80800201
DATA ·shuffleInt32x1to16bits+256+8(SB)/4,  $0x80800302
DATA ·shuffleInt32x1to16bits+256+12(SB)/4, $0x80800403

DATA ·shuffleInt32x1to16bits+256+16(SB)/4, $0x80800504
DATA ·shuffleInt32x1to16bits+256+20(SB)/4, $0x80800605
DATA ·shuffleInt32x1to16bits+256+24(SB)/4, $0x80800706
DATA ·shuffleInt32x1to16bits+256+28(SB)/4, $0x80800807

// 10 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,b,b,b,b,b,b]
// 2: [b,b,b,b,c,c,c,c]
// 3: [c,c,c,c,c,c,d,d]
// 4: [d,d,d,d,d,d,d,d]
// 5: [e,e,e,e,e,e,e,e]
// 6: [e,e,f,f,f,f,f,f]
// 7: [f,f,f,f,g,g,g,g]
// 8: [g,g,g,g,g,g,h,h]
// 9: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+288+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+288+4(SB)/4,  $0x80800201
DATA ·shuffleInt32x1to16bits+288+8(SB)/4,  $0x80800302
DATA ·shuffleInt32x1to16bits+288+12(SB)/4, $0x80800403

DATA ·shuffleInt32x1to16bits+288+16(SB)/4, $0x80800605
DATA ·shuffleInt32x1to16bits+288+20(SB)/4, $0x80800706
DATA ·shuffleInt32x1to16bits+288+24(SB)/4, $0x80800807
DATA ·shuffleInt32x1to16bits+288+28(SB)/4, $0x80800908

// 11 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,b,b,b,b,b]
// 2: [b,b,b,b,b,b,c,c]
// 3: [c,c,c,c,c,c,c,c]
// 4: [c,d,d,d,d,d,d,d]
// 5: [d,d,d,d,e,e,e,e]
// 6: [e,e,e,e,e,e,e,f]
// 7: [f,f,f,f,f,f,f,f]
// 8: [f,f,g,g,g,g,g,g]
// 9: [g,g,g,g,g,h,h,h]
// A: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+320+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+320+4(SB)/4,  $0x80800201
DATA ·shuffleInt32x1to16bits+320+8(SB)/4,  $0x80040302
DATA ·shuffleInt32x1to16bits+320+12(SB)/4, $0x80800504

DATA ·shuffleInt32x1to16bits+320+16(SB)/4, $0x80800605
DATA ·shuffleInt32x1to16bits+320+20(SB)/4, $0x80080706
DATA ·shuffleInt32x1to16bits+320+24(SB)/4, $0x80800908
DATA ·shuffleInt32x1to16bits+320+28(SB)/4, $0x80800A09

// 12 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,b,b,b,b]
// 2: [b,b,b,b,b,b,b,b]
// 3: [c,c,c,c,c,c,c,c]
// 4: [c,c,c,c,d,d,d,d]
// 5: [d,d,d,d,d,d,d,d]
// 6: [e,e,e,e,e,e,e,e]
// 7: [e,e,e,e,f,f,f,f]
// 8: [f,f,f,f,f,f,f,f]
// 9: [g,g,g,g,g,g,g,g]
// A: [g,g,g,g,h,h,h,h]
// B: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+352+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+352+4(SB)/4,  $0x80800201
DATA ·shuffleInt32x1to16bits+352+8(SB)/4,  $0x80080403
DATA ·shuffleInt32x1to16bits+352+12(SB)/4, $0x80800504

DATA ·shuffleInt32x1to16bits+352+16(SB)/4, $0x80800706
DATA ·shuffleInt32x1to16bits+352+20(SB)/4, $0x80800807
DATA ·shuffleInt32x1to16bits+352+24(SB)/4, $0x80800A09
DATA ·shuffleInt32x1to16bits+352+28(SB)/4, $0x80800B0A

// 13 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,b,b,b]
// 2: [b,b,b,b,b,b,b,b]
// 3: [b,b,c,c,c,c,c,c]
// 4: [c,c,c,c,c,c,c,d]
// 5: [d,d,d,d,d,d,d,d]
// 6: [d,d,d,d,e,e,e,e]
// 7: [e,e,e,e,e,e,e,e]
// 8: [e,f,f,f,f,f,f,f]
// 9: [f,f,f,f,f,f,g,g]
// A: [g,g,g,g,g,g,g,g]
// B: [g,g,g,h,h,h,h,h]
// C: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+384+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+384+4(SB)/4,  $0x80030201
DATA ·shuffleInt32x1to16bits+384+8(SB)/4,  $0x80800403
DATA ·shuffleInt32x1to16bits+384+12(SB)/4, $0x80060504

DATA ·shuffleInt32x1to16bits+384+16(SB)/4, $0x80080706
DATA ·shuffleInt32x1to16bits+384+20(SB)/4, $0x80800908
DATA ·shuffleInt32x1to16bits+384+24(SB)/4, $0x800B0A09
DATA ·shuffleInt32x1to16bits+384+28(SB)/4, $0x80800C0B

// 14 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,b,b]
// 2: [b,b,b,b,b,b,b,b]
// 3: [b,b,b,b,c,c,c,c]
// 4: [c,c,c,c,c,c,c,c]
// 5: [c,c,d,d,d,d,d,d]
// 6: [d,d,d,d,d,d,d,d]
// 7: [e,e,e,e,e,e,e,e]
// 8: [e,e,e,e,e,e,f,f]
// 9: [f,f,f,f,f,f,f,f]
// A: [f,f,f,f,g,g,g,g]
// B: [g,g,g,g,g,g,g,g]
// C: [g,g,h,h,h,h,h,h]
// D: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+416+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+416+4(SB)/4,  $0x80030201
DATA ·shuffleInt32x1to16bits+416+8(SB)/4,  $0x80050403
DATA ·shuffleInt32x1to16bits+416+12(SB)/4, $0x80800605

DATA ·shuffleInt32x1to16bits+416+16(SB)/4, $0x80080807
DATA ·shuffleInt32x1to16bits+416+20(SB)/4, $0x800A0908
DATA ·shuffleInt32x1to16bits+416+24(SB)/4, $0x800C0B0A
DATA ·shuffleInt32x1to16bits+416+28(SB)/4, $0x80800D0C

// 15 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,b]
// 2: [b,b,b,b,b,b,b,b]
// 3: [b,b,b,b,b,b,c,c]
// 4: [c,c,c,c,c,c,c,c]
// 5: [c,c,c,c,c,d,d,d]
// 6: [d,d,d,d,d,d,d,d]
// 7: [d,d,d,d,e,e,e,e]
// 8: [e,e,e,e,e,e,e,e]
// 9: [e,e,e,f,f,f,f,f]
// A: [f,f,f,f,f,f,f,f]
// B: [f,f,g,g,g,g,g,g]
// C: [g,g,g,g,g,g,g,g]
// D: [g,h,h,h,h,h,h,h]
// E: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+448+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+448+4(SB)/4,  $0x80030201
DATA ·shuffleInt32x1to16bits+448+8(SB)/4,  $0x80050403
DATA ·shuffleInt32x1to16bits+448+12(SB)/4, $0x80070605

DATA ·shuffleInt32x1to16bits+448+16(SB)/4, $0x80090807
DATA ·shuffleInt32x1to16bits+448+20(SB)/4, $0x800B0A09
DATA ·shuffleInt32x1to16bits+448+24(SB)/4, $0x800D0C0B
DATA ·shuffleInt32x1to16bits+448+28(SB)/4, $0x80800E0D

// 16 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [b,b,b,b,b,b,b,b]
// 3: [b,b,b,b,b,b,c,b]
// 4: [c,c,c,c,c,c,c,c]
// 5: [c,c,c,c,c,c,c,c]
// 6: [d,d,d,d,d,d,d,d]
// 7: [d,d,d,d,d,d,d,d]
// 8: [e,e,e,e,e,e,e,e]
// 9: [e,e,e,e,e,e,e,e]
// A: [f,f,f,f,f,f,f,f]
// B: [f,f,f,f,f,f,f,f]
// C: [g,g,g,g,g,g,g,g]
// D: [g,g,g,g,g,g,g,g]
// E: [h,h,h,h,h,h,h,h]
// F: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x1to16bits+480+0(SB)/4,  $0x80800100
DATA ·shuffleInt32x1to16bits+480+4(SB)/4,  $0x80800302
DATA ·shuffleInt32x1to16bits+480+8(SB)/4,  $0x80800504
DATA ·shuffleInt32x1to16bits+480+12(SB)/4, $0x80800706

DATA ·shuffleInt32x1to16bits+480+16(SB)/4, $0x80800908
DATA ·shuffleInt32x1to16bits+480+20(SB)/4, $0x80800B0A
DATA ·shuffleInt32x1to16bits+480+24(SB)/4, $0x80800D0C
DATA ·shuffleInt32x1to16bits+480+28(SB)/4, $0x80800F0E

// Shuffle masks for unpacking values from bit widths 17 to 26.
//
// The masks are grouped in 48 bytes chunks containing 3 masks of 16 bytes, with
// the following layout:
//
// - The first mask is used to shuffle values from the first 16 bytes of input
//   into the lower 16 bytes of output. These values are then shifted RIGHT to
//   be aligned on the begining of each 32 bit word.
//
// - The second mask selects values from the first 16 bytes of input into the
//   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
//   on the beginning of each 32 bit word.
//
// - The third mask selects values from the second 16 bytes of input into the
//   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
//   on the beginning of each 32 bit word.
//
// The bit width is intended to be used as an index into this array, using this
// formula to convert from the index to a byte offset:
//
//      offset = 48 * (bitWidth - 17)
//
GLOBL ·shuffleInt32x17to26bits(SB), RODATA|NOPTR, $480

// 17 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,b,b,b,b,b,b,b]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,c,c,c,c,c,c]
// 5: [c,c,c,c,c,c,c,c]
// 6: [c,c,c,d,d,d,d,d]
// 7: [d,d,d,d,d,d,d,d]
// 8: [d,d,d,d,e,e,e,e]
// 9: [e,e,e,e,e,e,e,e]
// A: [e,e,e,e,e,f,f,f]
// B: [f,f,f,f,f,f,f,f]
// C: [f,f,f,f,f,f,g,g]
// D: [g,g,g,g,g,g,g,g]
// E: [g,g,g,g,g,g,g,h]
// F: [h,h,h,h,h,h,h,h]
// ---
// 0: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+0+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+0+4(SB)/4,  $0x80040302
DATA ·shuffleInt32x17to26bits+0+8(SB)/4,  $0x80060504
DATA ·shuffleInt32x17to26bits+0+12(SB)/4, $0x80080706

DATA ·shuffleInt32x17to26bits+0+16(SB)/4, $0x800A0908
DATA ·shuffleInt32x17to26bits+0+20(SB)/4, $0x800C0B0A
DATA ·shuffleInt32x17to26bits+0+24(SB)/4, $0x800E0D0C
DATA ·shuffleInt32x17to26bits+0+28(SB)/4, $0x80800F0E

DATA ·shuffleInt32x17to26bits+0+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+0+36(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+0+40(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+0+44(SB)/4, $0x80008080

// 18 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,b,b,b,b,b,b]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,b,b,c,c,c,c]
// 5: [c,c,c,c,c,c,c,c]
// 6: [c,c,c,c,c,c,d,d]
// 7: [d,d,d,d,d,d,d,d]
// 8: [d,d,d,d,d,d,d,d]
// 9: [e,e,e,e,e,e,e,e]
// A: [e,e,e,e,e,e,e,e]
// B: [e,e,f,f,f,f,f,f]
// C: [f,f,f,f,f,f,f,f]
// D: [f,f,f,f,g,g,g,g]
// E: [g,g,g,g,g,g,g,g]
// F: [g,g,g,g,g,g,h,h]
// ---
// 0: [h,h,h,h,h,h,h,h]
// 1: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+48+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+48+4(SB)/4,  $0x80040302
DATA ·shuffleInt32x17to26bits+48+8(SB)/4,  $0x80060504
DATA ·shuffleInt32x17to26bits+48+12(SB)/4, $0x80080706

DATA ·shuffleInt32x17to26bits+48+16(SB)/4, $0x800B0A09
DATA ·shuffleInt32x17to26bits+48+20(SB)/4, $0x800D0C0B
DATA ·shuffleInt32x17to26bits+48+24(SB)/4, $0x800F0E0D
DATA ·shuffleInt32x17to26bits+48+28(SB)/4, $0x8080800F

DATA ·shuffleInt32x17to26bits+48+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+48+36(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+48+40(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+48+44(SB)/4, $0x80010080

// 19 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,b,b,b,b,b]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,c,c]
// 5: [c,c,c,c,c,c,c,c]
// 6: [c,c,c,c,c,c,c,c]
// 7: [c,d,d,d,d,d,d,d]
// 8: [d,d,d,d,d,d,d,d]
// 9: [d,d,d,d,e,e,e,e]
// A: [e,e,e,e,e,e,e,e]
// B: [e,e,e,e,e,e,e,f]
// C: [f,f,f,f,f,f,f,f]
// D: [f,f,f,f,f,f,f,f]
// E: [f,f,g,g,g,g,g,g]
// F: [g,g,g,g,g,g,g,g]
// ---
// 0: [g,g,g,g,g,h,h,h]
// 1: [h,h,h,h,h,h,h,h]
// 2: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+96+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+96+4(SB)/4,  $0x80040302
DATA ·shuffleInt32x17to26bits+96+8(SB)/4,  $0x07060504
DATA ·shuffleInt32x17to26bits+96+12(SB)/4, $0x80090807

DATA ·shuffleInt32x17to26bits+96+16(SB)/4, $0x800B0A09
DATA ·shuffleInt32x17to26bits+96+20(SB)/4, $0x0E0D0C0B
DATA ·shuffleInt32x17to26bits+96+24(SB)/4, $0x80800F0E
DATA ·shuffleInt32x17to26bits+96+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+96+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+96+36(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+96+40(SB)/4, $0x80008080
DATA ·shuffleInt32x17to26bits+96+44(SB)/4, $0x80020100

// 20 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,b,b,b,b]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [c,c,c,c,c,c,c,c]
// 6: [c,c,c,c,c,c,c,c]
// 7: [c,c,c,c,d,d,d,d]
// 8: [d,d,d,d,d,d,d,d]
// 9: [d,d,d,d,d,d,d,d]
// A: [e,e,e,e,e,e,e,e]
// B: [e,e,e,e,e,e,e,e]
// C: [e,e,e,e,f,f,f,f]
// D: [f,f,f,f,f,f,f,f]
// E: [f,f,f,f,f,f,f,f]
// F: [g,g,g,g,g,g,g,g]
// ---
// 0: [g,g,g,g,g,g,g,g]
// 1: [g,g,g,g,h,h,h,h]
// 2: [h,h,h,h,h,h,h,h]
// 3: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+144+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+144+4(SB)/4,  $0x80040302
DATA ·shuffleInt32x17to26bits+144+8(SB)/4,  $0x80070605
DATA ·shuffleInt32x17to26bits+144+12(SB)/4, $0x80090807

DATA ·shuffleInt32x17to26bits+144+16(SB)/4, $0x800C0B0A
DATA ·shuffleInt32x17to26bits+144+20(SB)/4, $0x800E0D0C
DATA ·shuffleInt32x17to26bits+144+24(SB)/4, $0x8080800F
DATA ·shuffleInt32x17to26bits+144+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+144+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+144+36(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+144+40(SB)/4, $0x80010080
DATA ·shuffleInt32x17to26bits+144+44(SB)/4, $0x80030201

// 21 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,b,b,b]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,c,c,c,c,c,c]
// 6: [c,c,c,c,c,c,c,c]
// 7: [c,c,c,c,c,c,c,d]
// 8: [d,d,d,d,d,d,d,d]
// 9: [d,d,d,d,d,d,d,d]
// A: [d,d,d,d,e,e,e,e]
// B: [e,e,e,e,e,e,e,e]
// C: [e,e,e,e,e,e,e,e]
// D: [e,f,f,f,f,f,f,f]
// E: [f,f,f,f,f,f,f,f]
// F: [f,f,f,f,f,f,g,g]
// ---
// 0: [g,g,g,g,g,g,g,g]
// 1: [g,g,g,g,g,g,g,g]
// 2: [g,g,g,h,h,h,h,h]
// 3: [h,h,h,h,h,h,h,h]
// 4: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+192+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+192+4(SB)/4,  $0x05040302
DATA ·shuffleInt32x17to26bits+192+8(SB)/4,  $0x80070605
DATA ·shuffleInt32x17to26bits+192+12(SB)/4, $0x0A090807

DATA ·shuffleInt32x17to26bits+192+16(SB)/4, $0x0D0C0B0A
DATA ·shuffleInt32x17to26bits+192+20(SB)/4, $0x800F0E0D
DATA ·shuffleInt32x17to26bits+192+24(SB)/4, $0x8080800F
DATA ·shuffleInt32x17to26bits+192+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+192+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+192+36(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+192+40(SB)/4, $0x02010080
DATA ·shuffleInt32x17to26bits+192+44(SB)/4, $0x80040302

// 22 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,b,b]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,c,c,c,c]
// 6: [c,c,c,c,c,c,c,c]
// 7: [c,c,c,c,c,c,c,c]
// 8: [c,c,d,d,d,d,d,d]
// 9: [d,d,d,d,d,d,d,d]
// A: [d,d,d,d,d,d,d,d]
// B: [e,e,e,e,e,e,e,e]
// C: [e,e,e,e,e,e,e,e]
// D: [e,e,e,e,e,e,f,f]
// E: [f,f,f,f,f,f,f,f]
// F: [f,f,f,f,f,f,f,f]
// ---
// 0: [f,f,f,f,g,g,g,g]
// 1: [g,g,g,g,g,g,g,g]
// 2: [g,g,g,g,g,g,g,g]
// 3: [g,g,h,h,h,h,h,h]
// 4: [h,h,h,h,h,h,h,h]
// 5: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+240+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+240+4(SB)/4,  $0x05040302
DATA ·shuffleInt32x17to26bits+240+8(SB)/4,  $0x08070605
DATA ·shuffleInt32x17to26bits+240+12(SB)/4, $0x800A0908

DATA ·shuffleInt32x17to26bits+240+16(SB)/4, $0x800D0C0B
DATA ·shuffleInt32x17to26bits+240+20(SB)/4, $0x800F0E0D
DATA ·shuffleInt32x17to26bits+240+24(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+240+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+240+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+240+36(SB)/4, $0x00808080
DATA ·shuffleInt32x17to26bits+240+40(SB)/4, $0x03020100
DATA ·shuffleInt32x17to26bits+240+44(SB)/4, $0x80050403

// 23 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,b]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,c,c]
// 6: [c,c,c,c,c,c,c,c]
// 7: [c,c,c,c,c,c,c,c]
// 8: [c,c,c,c,c,d,d,d]
// 9: [d,d,d,d,d,d,d,d]
// A: [d,d,d,d,d,d,d,d]
// B: [d,d,d,d,e,e,e,e]
// C: [e,e,e,e,e,e,e,e]
// D: [e,e,e,e,e,e,e,e]
// E: [e,e,e,f,f,f,f,f]
// F: [f,f,f,f,f,f,f,f]
// ---
// 0: [f,f,f,f,f,f,f,f]
// 1: [f,f,g,g,g,g,g,g]
// 2: [g,g,g,g,g,g,g,g]
// 3: [g,g,g,g,g,g,g,g]
// 4: [g,h,h,h,h,h,h,h]
// 5: [h,h,h,h,h,h,h,h]
// 6: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+288+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+288+4(SB)/4,  $0x05040302
DATA ·shuffleInt32x17to26bits+288+8(SB)/4,  $0x08070605
DATA ·shuffleInt32x17to26bits+288+12(SB)/4, $0x0B0A0908

DATA ·shuffleInt32x17to26bits+288+16(SB)/4, $0x0E0D0C0B
DATA ·shuffleInt32x17to26bits+288+20(SB)/4, $0x80800F0E
DATA ·shuffleInt32x17to26bits+288+24(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+288+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+288+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+288+36(SB)/4, $0x01008080
DATA ·shuffleInt32x17to26bits+288+40(SB)/4, $0x04030201
DATA ·shuffleInt32x17to26bits+288+44(SB)/4, $0x80060504

// 24 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [b,b,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [c,c,c,c,c,c,c,c]
// 7: [c,c,c,c,c,c,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [d,d,d,d,d,d,d,d]
// A: [d,d,d,d,d,d,d,d]
// B: [d,d,d,d,d,d,d,d]
// C: [e,e,e,e,e,e,e,e]
// D: [e,e,e,e,e,e,e,e]
// E: [e,e,e,e,e,e,e,e]
// F: [f,f,f,f,f,f,f,f]
// ---
// 0: [f,f,f,f,f,f,f,f]
// 1: [f,f,f,f,f,f,f,f]
// 2: [g,g,g,g,g,g,g,g]
// 3: [g,g,g,g,g,g,g,g]
// 4: [g,g,g,g,g,g,g,g]
// 5: [h,h,h,h,h,h,h,h]
// 6: [h,h,h,h,h,h,h,h]
// 7: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+336+0(SB)/4,  $0x80020100
DATA ·shuffleInt32x17to26bits+336+4(SB)/4,  $0x80050403
DATA ·shuffleInt32x17to26bits+336+8(SB)/4,  $0x80080706
DATA ·shuffleInt32x17to26bits+336+12(SB)/4, $0x800B0A09

DATA ·shuffleInt32x17to26bits+336+16(SB)/4, $0x800E0D0C
DATA ·shuffleInt32x17to26bits+336+20(SB)/4, $0x8080800F
DATA ·shuffleInt32x17to26bits+336+24(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+336+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+336+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+336+36(SB)/4, $0x80010080
DATA ·shuffleInt32x17to26bits+336+40(SB)/4, $0x80040302
DATA ·shuffleInt32x17to26bits+336+44(SB)/4, $0x80070605

// 25 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [a,b,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [b,b,c,c,c,c,c,c]
// 7: [c,c,c,c,c,c,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [c,c,c,d,d,d,d,d]
// A: [d,d,d,d,d,d,d,d]
// B: [d,d,d,d,d,d,d,d]
// C: [d,d,d,d,e,e,e,e]
// D: [e,e,e,e,e,e,e,e]
// E: [e,e,e,e,e,e,e,e]
// F: [e,e,e,e,e,f,f,f]
// ---
// 0: [f,f,f,f,f,f,f,f]
// 1: [f,f,f,f,f,f,f,f]
// 2: [f,f,f,f,f,f,g,g]
// 3: [g,g,g,g,g,g,g,g]
// 4: [g,g,g,g,g,g,g,g]
// 5: [g,g,g,g,g,g,g,h]
// 6: [h,h,h,h,h,h,h,h]
// 7: [h,h,h,h,h,h,h,h]
// 8: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+384+0(SB)/4,  $0x03020100
DATA ·shuffleInt32x17to26bits+384+4(SB)/4,  $0x06050403
DATA ·shuffleInt32x17to26bits+384+8(SB)/4,  $0x09080706
DATA ·shuffleInt32x17to26bits+384+12(SB)/4, $0x0C0B0A09

DATA ·shuffleInt32x17to26bits+384+16(SB)/4, $0x0F0E0D0C
DATA ·shuffleInt32x17to26bits+384+20(SB)/4, $0x8080800F
DATA ·shuffleInt32x17to26bits+384+24(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+384+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+384+32(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+384+36(SB)/4, $0x02010080
DATA ·shuffleInt32x17to26bits+384+40(SB)/4, $0x05040302
DATA ·shuffleInt32x17to26bits+384+44(SB)/4, $0x08070605

// 26 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [a,a,b,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [b,b,b,b,c,c,c,c]
// 7: [c,c,c,c,c,c,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [c,c,c,c,c,c,d,d]
// A: [d,d,d,d,d,d,d,d]
// B: [d,d,d,d,d,d,d,d]
// C: [d,d,d,d,d,d,d,d]
// D: [e,e,e,e,e,e,e,e]
// E: [e,e,e,e,e,e,e,e]
// F: [e,e,e,e,e,e,e,e]
// ---
// 0: [e,e,f,f,f,f,f,f]
// 1: [f,f,f,f,f,f,f,f]
// 2: [f,f,f,f,f,f,f,f]
// 3: [f,f,f,f,g,g,g,g]
// 4: [g,g,g,g,g,g,g,g]
// 5: [g,g,g,g,g,g,g,g]
// 6: [g,g,g,g,g,g,h,h]
// 7: [h,h,h,h,h,h,h,h]
// 8: [h,h,h,h,h,h,h,h]
// 9: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x17to26bits+432+0(SB)/4,  $0x03020100
DATA ·shuffleInt32x17to26bits+432+4(SB)/4,  $0x06050403
DATA ·shuffleInt32x17to26bits+432+8(SB)/4,  $0x09080706
DATA ·shuffleInt32x17to26bits+432+12(SB)/4, $0x0C0B0A09

DATA ·shuffleInt32x17to26bits+432+16(SB)/4, $0x800F0E0D
DATA ·shuffleInt32x17to26bits+432+20(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+432+24(SB)/4, $0x80808080
DATA ·shuffleInt32x17to26bits+432+28(SB)/4, $0x80808080

DATA ·shuffleInt32x17to26bits+432+32(SB)/4, $0x00808080
DATA ·shuffleInt32x17to26bits+432+36(SB)/4, $0x03020100
DATA ·shuffleInt32x17to26bits+432+40(SB)/4, $0x06050403
DATA ·shuffleInt32x17to26bits+432+44(SB)/4, $0x09080706

// Shuffle masks for unpacking values from bit widths 27 to 31.
//
// The masks are grouped in 80 bytes chunks containing 5 masks of 16 bytes, with
// the following layout:
//
// - The first mask is used to shuffle values from the first 16 bytes of input
//   into the lower 16 bytes of output. These values are then shifted RIGHT to
//   be aligned on the begining of each 32 bit word.
//
// - The second mask is used to shuffle upper bits of bit-packed values of the
//   first 16 bytes of input that spanned across 5 bytes. These extra bits cannot
//   be selected by the first mask (which can select at most 4 bytes per word).
//   The extra bits are then shifted LEFT to be positioned at the end of the
//   words, after the bits extracted by the first mask.
//
// - The third mask selects values from the first 16 bytes of input into the
//   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
//   on the beginning of each 32 bit word.
//
// - The fourth mask selects values from the second 16 bytes of input into the
//   upper 16 bytes of output. These values are then shifted RIGHT to be aligned
//   on the beginning of each 32 bit word.
//
// - The fifth mask is used to shuffle upper bits of bit-packed values values of
//   second 16 bytes of input that spanned across 5 bytes. These values are then
//   shifted LEFT to be aligned on the beginning of each 32 bit word.
//
// The bit width is intended to be used as an index into this array, using this
// formula to convert from the index to a byte offset:
//
//      offset = 80 * (bitWidth - 27)
//
GLOBL ·shuffleInt32x27to31bits(SB), RODATA|NOPTR, $400

// 27 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [a,a,a,b,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [b,b,b,b,b,b,c,c]
// 7: [c,c,c,c,c,c,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [c,c,c,c,c,c,c,c]
// A: [c,d,d,d,d,d,d,d]
// B: [d,d,d,d,d,d,d,d]
// C: [d,d,d,d,d,d,d,d]
// D: [d,d,d,d,e,e,e,e]
// E: [e,e,e,e,e,e,e,e]
// F: [e,e,e,e,e,e,e,e]
// ---
// 0: [e,e,e,e,e,e,e,f]
// 1: [f,f,f,f,f,f,f,f]
// 2: [f,f,f,f,f,f,f,f]
// 3: [f,f,f,f,f,f,f,f]
// 4: [f,f,g,g,g,g,g,g]
// 5: [g,g,g,g,g,g,g,g]
// 6: [g,g,g,g,g,g,g,g]
// 7: [g,g,g,g,g,h,h,h]
// 8: [h,h,h,h,h,h,h,h]
// 9: [h,h,h,h,h,h,h,h]
// A: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x27to31bits+0+0(SB)/4,  $0x03020100
DATA ·shuffleInt32x27to31bits+0+4(SB)/4,  $0x06050403
DATA ·shuffleInt32x27to31bits+0+8(SB)/4,  $0x09080706
DATA ·shuffleInt32x27to31bits+0+12(SB)/4, $0x0D0C0B0A

DATA ·shuffleInt32x27to31bits+0+16(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+0+20(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+0+24(SB)/4, $0x0A808080
DATA ·shuffleInt32x27to31bits+0+28(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+0+32(SB)/4, $0x800F0E0D
DATA ·shuffleInt32x27to31bits+0+36(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+0+40(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+0+44(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+0+48(SB)/4, $0x00808080
DATA ·shuffleInt32x27to31bits+0+52(SB)/4, $0x03020100
DATA ·shuffleInt32x27to31bits+0+56(SB)/4, $0x07060504
DATA ·shuffleInt32x27to31bits+0+60(SB)/4, $0x0A090807

DATA ·shuffleInt32x27to31bits+0+64(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+0+68(SB)/4, $0x04808080
DATA ·shuffleInt32x27to31bits+0+72(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+0+76(SB)/4, $0x80808080

// 28 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [a,a,a,a,b,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [b,b,b,b,b,b,b,b]
// 7: [c,c,c,c,c,c,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [c,c,c,c,c,c,c,c]
// A: [c,c,c,c,d,d,d,d]
// B: [d,d,d,d,d,d,d,d]
// C: [d,d,d,d,d,d,d,d]
// D: [d,d,d,d,d,d,d,d]
// E: [e,e,e,e,e,e,e,e]
// F: [e,e,e,e,e,e,e,e]
// ---
// 0: [e,e,e,e,e,e,e,e]
// 1: [e,e,e,e,f,f,f,f]
// 2: [f,f,f,f,f,f,f,f]
// 3: [f,f,f,f,f,f,f,f]
// 4: [f,f,f,f,f,f,f,f]
// 5: [g,g,g,g,g,g,g,g]
// 6: [g,g,g,g,g,g,g,g]
// 7: [g,g,g,g,g,g,g,g]
// 8: [g,g,g,g,h,h,h,h]
// 9: [h,h,h,h,h,h,h,h]
// A: [h,h,h,h,h,h,h,h]
// B: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x27to31bits+80+0(SB)/4,  $0x03020100
DATA ·shuffleInt32x27to31bits+80+4(SB)/4,  $0x06050403
DATA ·shuffleInt32x27to31bits+80+8(SB)/4,  $0x0A090807
DATA ·shuffleInt32x27to31bits+80+12(SB)/4, $0x0D0C0B0A

DATA ·shuffleInt32x27to31bits+80+16(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+20(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+24(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+28(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+80+32(SB)/4, $0x80800F0E
DATA ·shuffleInt32x27to31bits+80+36(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+40(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+44(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+80+48(SB)/4, $0x01008080
DATA ·shuffleInt32x27to31bits+80+52(SB)/4, $0x04030201
DATA ·shuffleInt32x27to31bits+80+56(SB)/4, $0x08070605
DATA ·shuffleInt32x27to31bits+80+60(SB)/4, $0x0B0A0908

DATA ·shuffleInt32x27to31bits+80+64(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+68(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+72(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+80+76(SB)/4, $0x80808080

// 29 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [a,a,a,a,a,b,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [b,b,b,b,b,b,b,b]
// 7: [b,b,c,c,c,c,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [c,c,c,c,c,c,c,c]
// A: [c,c,c,c,c,c,c,d]
// B: [d,d,d,d,d,d,d,d]
// C: [d,d,d,d,d,d,d,d]
// D: [d,d,d,d,d,d,d,d]
// E: [d,d,d,d,e,e,e,e]
// F: [e,e,e,e,e,e,e,e]
// ---
// 0: [e,e,e,e,e,e,e,e]
// 1: [e,e,e,e,e,e,e,e]
// 2: [e,f,f,f,f,f,f,f]
// 3: [f,f,f,f,f,f,f,f]
// 4: [f,f,f,f,f,f,f,f]
// 5: [f,f,f,f,f,f,g,g]
// 6: [g,g,g,g,g,g,g,g]
// 7: [g,g,g,g,g,g,g,g]
// 8: [g,g,g,g,g,g,g,g]
// 9: [g,g,g,h,h,h,h,h]
// A: [h,h,h,h,h,h,h,h]
// B: [h,h,h,h,h,h,h,h]
// C: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x27to31bits+160+0(SB)/4,  $0x03020100
DATA ·shuffleInt32x27to31bits+160+4(SB)/4,  $0x06050403
DATA ·shuffleInt32x27to31bits+160+8(SB)/4,  $0x0A090807
DATA ·shuffleInt32x27to31bits+160+12(SB)/4, $0x0D0C0B0A

DATA ·shuffleInt32x27to31bits+160+16(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+160+20(SB)/4, $0x07808080
DATA ·shuffleInt32x27to31bits+160+24(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+160+28(SB)/4, $0x0E808080

DATA ·shuffleInt32x27to31bits+160+32(SB)/4, $0x80800F0E
DATA ·shuffleInt32x27to31bits+160+36(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+160+40(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+160+44(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+160+48(SB)/4, $0x01008080
DATA ·shuffleInt32x27to31bits+160+52(SB)/4, $0x05040302
DATA ·shuffleInt32x27to31bits+160+56(SB)/4, $0x08070605
DATA ·shuffleInt32x27to31bits+160+60(SB)/4, $0x0C0B0A09

DATA ·shuffleInt32x27to31bits+160+64(SB)/4, $0x02808080
DATA ·shuffleInt32x27to31bits+160+68(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+160+72(SB)/4, $0x09808080
DATA ·shuffleInt32x27to31bits+160+76(SB)/4, $0x80808080

// 30 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [a,a,a,a,a,a,b,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [b,b,b,b,b,b,b,b]
// 7: [b,b,b,b,c,c,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [c,c,c,c,c,c,c,c]
// A: [c,c,c,c,c,c,c,c]
// B: [c,c,d,d,d,d,d,d]
// C: [d,d,d,d,d,d,d,d]
// D: [d,d,d,d,d,d,d,d]
// E: [d,d,d,d,d,d,d,d]
// F: [e,e,e,e,e,e,e,e]
// ---
// 0: [e,e,e,e,e,e,e,e]
// 1: [e,e,e,e,e,e,e,e]
// 2: [e,e,e,e,e,e,f,f]
// 3: [f,f,f,f,f,f,f,f]
// 4: [f,f,f,f,f,f,f,f]
// 5: [f,f,f,f,f,f,f,f]
// 6: [f,f,f,f,g,g,g,g]
// 7: [g,g,g,g,g,g,g,g]
// 8: [g,g,g,g,g,g,g,g]
// 9: [g,g,g,g,g,g,g,g]
// A: [g,g,h,h,h,h,h,h]
// B: [h,h,h,h,h,h,h,h]
// C: [h,h,h,h,h,h,h,h]
// D: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x27to31bits+240+0(SB)/4,  $0x03020100
DATA ·shuffleInt32x27to31bits+240+4(SB)/4,  $0x06050403
DATA ·shuffleInt32x27to31bits+240+8(SB)/4,  $0x0A090807
DATA ·shuffleInt32x27to31bits+240+12(SB)/4, $0x0E0D0C0B

DATA ·shuffleInt32x27to31bits+240+16(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+240+20(SB)/4, $0x07808080
DATA ·shuffleInt32x27to31bits+240+24(SB)/4, $0x0B808080
DATA ·shuffleInt32x27to31bits+240+28(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+240+32(SB)/4, $0x8080800F
DATA ·shuffleInt32x27to31bits+240+36(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+240+40(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+240+44(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+240+48(SB)/4, $0x02010080
DATA ·shuffleInt32x27to31bits+240+52(SB)/4, $0x05040302
DATA ·shuffleInt32x27to31bits+240+56(SB)/4, $0x09080706
DATA ·shuffleInt32x27to31bits+240+60(SB)/4, $0x0D0C0B0A

DATA ·shuffleInt32x27to31bits+240+64(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+240+68(SB)/4, $0x06808080
DATA ·shuffleInt32x27to31bits+240+72(SB)/4, $0x0A808080
DATA ·shuffleInt32x27to31bits+240+76(SB)/4, $0x80808080

// 31 bits => 32 bits
// ------------------
// 0: [a,a,a,a,a,a,a,a]
// 1: [a,a,a,a,a,a,a,a]
// 2: [a,a,a,a,a,a,a,a]
// 3: [a,a,a,a,a,a,a,b]
// 4: [b,b,b,b,b,b,b,b]
// 5: [b,b,b,b,b,b,b,b]
// 6: [b,b,b,b,b,b,b,b]
// 7: [b,b,b,b,b,b,c,c]
// 8: [c,c,c,c,c,c,c,c]
// 9: [c,c,c,c,c,c,c,c]
// A: [c,c,c,c,c,c,c,c]
// B: [c,c,c,c,c,d,d,d]
// C: [d,d,d,d,d,d,d,d]
// D: [d,d,d,d,d,d,d,d]
// E: [d,d,d,d,d,d,d,d]
// F: [d,d,d,d,e,e,e,e]
// ---
// 0: [e,e,e,e,e,e,e,e]
// 1: [e,e,e,e,e,e,e,e]
// 2: [e,e,e,e,e,e,e,e]
// 3: [e,e,e,f,f,f,f,f]
// 4: [f,f,f,f,f,f,f,f]
// 5: [f,f,f,f,f,f,f,f]
// 6: [f,f,f,f,f,f,f,f]
// 7: [f,f,g,g,g,g,g,g]
// 8: [g,g,g,g,g,g,g,g]
// 9: [g,g,g,g,g,g,g,g]
// A: [g,g,g,g,g,g,g,g]
// B: [g,h,h,h,h,h,h,h]
// C: [h,h,h,h,h,h,h,h]
// D: [h,h,h,h,h,h,h,h]
// E: [h,h,h,h,h,h,h,h]
// ...
DATA ·shuffleInt32x27to31bits+320+0(SB)/4,  $0x03020100
DATA ·shuffleInt32x27to31bits+320+4(SB)/4,  $0x06050403
DATA ·shuffleInt32x27to31bits+320+8(SB)/4,  $0x0A090807
DATA ·shuffleInt32x27to31bits+320+12(SB)/4, $0x0E0D0C0B

DATA ·shuffleInt32x27to31bits+320+16(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+320+20(SB)/4, $0x07808080
DATA ·shuffleInt32x27to31bits+320+24(SB)/4, $0x0B808080
DATA ·shuffleInt32x27to31bits+320+28(SB)/4, $0x0F808080

DATA ·shuffleInt32x27to31bits+320+32(SB)/4, $0x8080800F
DATA ·shuffleInt32x27to31bits+320+36(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+320+40(SB)/4, $0x80808080
DATA ·shuffleInt32x27to31bits+320+44(SB)/4, $0x80808080

DATA ·shuffleInt32x27to31bits+320+48(SB)/4, $0x02010080
DATA ·shuffleInt32x27to31bits+320+52(SB)/4, $0x06050403
DATA ·shuffleInt32x27to31bits+320+56(SB)/4, $0x0A090807
DATA ·shuffleInt32x27to31bits+320+60(SB)/4, $0x0E0D0C0B

DATA ·shuffleInt32x27to31bits+320+64(SB)/4, $0x03808080
DATA ·shuffleInt32x27to31bits+320+68(SB)/4, $0x07808080
DATA ·shuffleInt32x27to31bits+320+72(SB)/4, $0x0B808080
DATA ·shuffleInt32x27to31bits+320+76(SB)/4, $0x80808080

// The RIGHT shifts to unpack 32 bits integers.
//
// The following formula was determined empirically as the expression which
// generates shift values:
//
//      shift[i] = (i * bitWidth) % 8
//
GLOBL ·shiftRightInt32(SB), RODATA|NOPTR, $256

DATA ·shiftRightInt32+0+0(SB)/4,  $0
DATA ·shiftRightInt32+0+4(SB)/4,  $1
DATA ·shiftRightInt32+0+8(SB)/4,  $2
DATA ·shiftRightInt32+0+12(SB)/4, $3
DATA ·shiftRightInt32+0+16(SB)/4, $4
DATA ·shiftRightInt32+0+20(SB)/4, $5
DATA ·shiftRightInt32+0+24(SB)/4, $6
DATA ·shiftRightInt32+0+28(SB)/4, $7

DATA ·shiftRightInt32+32+0(SB)/4,  $0
DATA ·shiftRightInt32+32+4(SB)/4,  $2
DATA ·shiftRightInt32+32+8(SB)/4,  $4
DATA ·shiftRightInt32+32+12(SB)/4, $6
DATA ·shiftRightInt32+32+16(SB)/4, $0
DATA ·shiftRightInt32+32+20(SB)/4, $2
DATA ·shiftRightInt32+32+24(SB)/4, $4
DATA ·shiftRightInt32+32+28(SB)/4, $6

DATA ·shiftRightInt32+64+0(SB)/4,  $0
DATA ·shiftRightInt32+64+4(SB)/4,  $3
DATA ·shiftRightInt32+64+8(SB)/4,  $6
DATA ·shiftRightInt32+64+12(SB)/4, $1
DATA ·shiftRightInt32+64+16(SB)/4, $4
DATA ·shiftRightInt32+64+20(SB)/4, $7
DATA ·shiftRightInt32+64+24(SB)/4, $2
DATA ·shiftRightInt32+64+28(SB)/4, $5

DATA ·shiftRightInt32+96+0(SB)/4,  $0
DATA ·shiftRightInt32+96+4(SB)/4,  $4
DATA ·shiftRightInt32+96+8(SB)/4,  $0
DATA ·shiftRightInt32+96+12(SB)/4, $4
DATA ·shiftRightInt32+96+16(SB)/4, $0
DATA ·shiftRightInt32+96+20(SB)/4, $4
DATA ·shiftRightInt32+96+24(SB)/4, $0
DATA ·shiftRightInt32+96+28(SB)/4, $4

DATA ·shiftRightInt32+128+0(SB)/4,  $0
DATA ·shiftRightInt32+128+4(SB)/4,  $5
DATA ·shiftRightInt32+128+8(SB)/4,  $2
DATA ·shiftRightInt32+128+12(SB)/4, $7
DATA ·shiftRightInt32+128+16(SB)/4, $4
DATA ·shiftRightInt32+128+20(SB)/4, $1
DATA ·shiftRightInt32+128+24(SB)/4, $6
DATA ·shiftRightInt32+128+28(SB)/4, $3

DATA ·shiftRightInt32+160+0(SB)/4,  $0
DATA ·shiftRightInt32+160+4(SB)/4,  $6
DATA ·shiftRightInt32+160+8(SB)/4,  $4
DATA ·shiftRightInt32+160+12(SB)/4, $2
DATA ·shiftRightInt32+160+16(SB)/4, $0
DATA ·shiftRightInt32+160+20(SB)/4, $6
DATA ·shiftRightInt32+160+24(SB)/4, $4
DATA ·shiftRightInt32+160+28(SB)/4, $2

DATA ·shiftRightInt32+192+0(SB)/4,  $0
DATA ·shiftRightInt32+192+4(SB)/4,  $7
DATA ·shiftRightInt32+192+8(SB)/4,  $6
DATA ·shiftRightInt32+192+12(SB)/4, $5
DATA ·shiftRightInt32+192+16(SB)/4, $4
DATA ·shiftRightInt32+192+20(SB)/4, $3
DATA ·shiftRightInt32+192+24(SB)/4, $2
DATA ·shiftRightInt32+192+28(SB)/4, $1

DATA ·shiftRightInt32+224+0(SB)/4,  $0
DATA ·shiftRightInt32+224+4(SB)/4,  $0
DATA ·shiftRightInt32+224+8(SB)/4,  $0
DATA ·shiftRightInt32+224+12(SB)/4, $0
DATA ·shiftRightInt32+224+16(SB)/4, $0
DATA ·shiftRightInt32+224+20(SB)/4, $0
DATA ·shiftRightInt32+224+24(SB)/4, $0
DATA ·shiftRightInt32+224+28(SB)/4, $0

// The LEFT shifts to unpack 32 bits integers.
//
// The following formula was determined empirically as the expression which
// generates shift values:
//
//      shift[i] = (8 - (i * bitWidth)) % 8
//
GLOBL ·shiftLeftInt32(SB), RODATA|NOPTR, $256

DATA ·shiftLeftInt32+0+0(SB)/4,  $0
DATA ·shiftLeftInt32+0+4(SB)/4,  $7
DATA ·shiftLeftInt32+0+8(SB)/4,  $6
DATA ·shiftLeftInt32+0+12(SB)/4, $5
DATA ·shiftLeftInt32+0+16(SB)/4, $4
DATA ·shiftLeftInt32+0+20(SB)/4, $3
DATA ·shiftLeftInt32+0+24(SB)/4, $2
DATA ·shiftLeftInt32+0+28(SB)/4, $1

DATA ·shiftLeftInt32+32+0(SB)/4,  $0
DATA ·shiftLeftInt32+32+4(SB)/4,  $6
DATA ·shiftLeftInt32+32+8(SB)/4,  $4
DATA ·shiftLeftInt32+32+12(SB)/4, $2
DATA ·shiftLeftInt32+32+16(SB)/4, $0
DATA ·shiftLeftInt32+32+20(SB)/4, $6
DATA ·shiftLeftInt32+32+24(SB)/4, $4
DATA ·shiftLeftInt32+32+28(SB)/4, $2

DATA ·shiftLeftInt32+64+0(SB)/4,  $0
DATA ·shiftLeftInt32+64+4(SB)/4,  $5
DATA ·shiftLeftInt32+64+8(SB)/4,  $2
DATA ·shiftLeftInt32+64+12(SB)/4, $7
DATA ·shiftLeftInt32+64+16(SB)/4, $4
DATA ·shiftLeftInt32+64+20(SB)/4, $1
DATA ·shiftLeftInt32+64+24(SB)/4, $6
DATA ·shiftLeftInt32+64+28(SB)/4, $3

DATA ·shiftLeftInt32+96+0(SB)/4,  $0
DATA ·shiftLeftInt32+96+4(SB)/4,  $4
DATA ·shiftLeftInt32+96+8(SB)/4,  $0
DATA ·shiftLeftInt32+96+12(SB)/4, $4
DATA ·shiftLeftInt32+96+16(SB)/4, $0
DATA ·shiftLeftInt32+96+20(SB)/4, $4
DATA ·shiftLeftInt32+96+24(SB)/4, $0
DATA ·shiftLeftInt32+96+28(SB)/4, $4

DATA ·shiftLeftInt32+128+0(SB)/4,  $0
DATA ·shiftLeftInt32+128+4(SB)/4,  $3
DATA ·shiftLeftInt32+128+8(SB)/4,  $6
DATA ·shiftLeftInt32+128+12(SB)/4, $1
DATA ·shiftLeftInt32+128+16(SB)/4, $4
DATA ·shiftLeftInt32+128+20(SB)/4, $7
DATA ·shiftLeftInt32+128+24(SB)/4, $2
DATA ·shiftLeftInt32+128+28(SB)/4, $5

DATA ·shiftLeftInt32+160+0(SB)/4,  $0
DATA ·shiftLeftInt32+160+4(SB)/4,  $2
DATA ·shiftLeftInt32+160+8(SB)/4,  $4
DATA ·shiftLeftInt32+160+12(SB)/4, $6
DATA ·shiftLeftInt32+160+16(SB)/4, $0
DATA ·shiftLeftInt32+160+20(SB)/4, $2
DATA ·shiftLeftInt32+160+24(SB)/4, $4
DATA ·shiftLeftInt32+160+28(SB)/4, $6

DATA ·shiftLeftInt32+192+0(SB)/4,  $0
DATA ·shiftLeftInt32+192+4(SB)/4,  $1
DATA ·shiftLeftInt32+192+8(SB)/4,  $2
DATA ·shiftLeftInt32+192+12(SB)/4, $3
DATA ·shiftLeftInt32+192+16(SB)/4, $4
DATA ·shiftLeftInt32+192+20(SB)/4, $5
DATA ·shiftLeftInt32+192+24(SB)/4, $6
DATA ·shiftLeftInt32+192+28(SB)/4, $7

DATA ·shiftLeftInt32+224+0(SB)/4,  $0
DATA ·shiftLeftInt32+224+4(SB)/4,  $0
DATA ·shiftLeftInt32+224+8(SB)/4,  $0
DATA ·shiftLeftInt32+224+12(SB)/4, $0
DATA ·shiftLeftInt32+224+16(SB)/4, $0
DATA ·shiftLeftInt32+224+20(SB)/4, $0
DATA ·shiftLeftInt32+224+24(SB)/4, $0
DATA ·shiftLeftInt32+224+28(SB)/4, $0


================================================
FILE: internal/bitpack/pack.go
================================================
package bitpack

import (
	"encoding/binary"
)

// PackInt32 packs values from src to dst, each value is packed into the given
// bit width regardless of how many bits are needed to represent it.
//
// The function panics if dst is too short to hold the bit packed values.
func PackInt32(dst []byte, src []int32, bitWidth uint) {
	assertPack(dst, len(src), bitWidth)
	packInt32(dst, src, bitWidth)
}

func packInt32(dst []byte, src []int32, bitWidth uint) {
	n := ByteCount(uint(len(src)) * bitWidth)
	b := dst[:n]

	for i := range b {
		b[i] = 0
	}

	bitMask := uint32(1<<bitWidth) - 1
	bitOffset := uint(0)

	for _, value := range src {
		i := bitOffset / 32
		j := bitOffset % 32

		lo := binary.LittleEndian.Uint32(dst[(i+0)*4:])
		hi := binary.LittleEndian.Uint32(dst[(i+1)*4:])

		lo |= (uint32(value) & bitMask) << j
		hi |= (uint32(value) >> (32 - j))

		binary.LittleEndian.PutUint32(dst[(i+0)*4:], lo)
		binary.LittleEndian.PutUint32(dst[(i+1)*4:], hi)

		bitOffset += bitWidth
	}
}

// PackInt64 packs values from src to dst, each value is packed into the given
// bit width regardless of how many bits are needed to represent it.
//
// The function panics if dst is too short to hold the bit packed values.
func PackInt64(dst []byte, src []int64, bitWidth uint) {
	assertPack(dst, len(src), bitWidth)
	packInt64(dst, src, bitWidth)
}

func packInt64(dst []byte, src []int64, bitWidth uint) {
	n := ByteCount(uint(len(src)) * bitWidth)
	b := dst[:n]

	for i := range b {
		b[i] = 0
	}

	bitMask := uint64(1<<bitWidth) - 1
	bitOffset := uint(0)

	for _, value := range src {
		i := bitOffset / 64
		j := bitOffset % 64

		lo := binary.LittleEndian.Uint64(dst[(i+0)*8:])
		hi := binary.LittleEndian.Uint64(dst[(i+1)*8:])

		lo |= (uint64(value) & bitMask) << j
		hi |= (uint64(value) >> (64 - j))

		binary.LittleEndian.PutUint64(dst[(i+0)*8:], lo)
		binary.LittleEndian.PutUint64(dst[(i+1)*8:], hi)

		bitOffset += bitWidth
	}
}

func assertPack(dst []byte, count int, bitWidth uint) {
	_ = dst[:ByteCount(bitWidth*uint(count))]
}


================================================
FILE: internal/bitpack/unpack.go
================================================
package bitpack

// PaddingInt32 is the padding expected to exist after the end of input buffers
// for the UnpackInt32 algorithm to avoid reading beyond the end of the input.
const PaddingInt32 = 16

// PaddingInt64 is the padding expected to exist after the end of input buffers
// for the UnpackInt32 algorithm to avoid reading beyond the end of the input.
const PaddingInt64 = 32

// UnpackInt32 unpacks 32 bit integers from src to dst.
//
// The function unpacked len(dst) integers, it panics if src is too short to
// contain len(dst) values of the given bit width.
func UnpackInt32(dst []int32, src []byte, bitWidth uint) {
	_ = src[:ByteCount(bitWidth*uint(len(dst))+8*PaddingInt32)]
	unpackInt32(dst, src, bitWidth)
}

// UnpackInt64 unpacks 64 bit integers from src to dst.
//
// The function unpacked len(dst) integers, it panics if src is too short to
// contain len(dst) values of the given bit width.
func UnpackInt64(dst []int64, src []byte, bitWidth uint) {
	_ = src[:ByteCount(bitWidth*uint(len(dst))+8*PaddingInt64)]
	unpackInt64(dst, src, bitWidth)
}


================================================
FILE: internal/bitpack/unpack_int32_amd64.go
================================================
//go:build !purego

package bitpack

import (
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"golang.org/x/sys/cpu"
)

//go:noescape
func unpackInt32Default(dst []int32, src []byte, bitWidth uint)

//go:noescape
func unpackInt32x1to16bitsAVX2(dst []int32, src []byte, bitWidth uint)

//go:noescape
func unpackInt32x17to26bitsAVX2(dst []int32, src []byte, bitWidth uint)

//go:noescape
func unpackInt32x27to31bitsAVX2(dst []int32, src []byte, bitWidth uint)

func unpackInt32(dst []int32, src []byte, bitWidth uint) {
	hasAVX2 := cpu.X86.HasAVX2
	switch {
	case hasAVX2 && bitWidth <= 16:
		unpackInt32x1to16bitsAVX2(dst, src, bitWidth)
	case hasAVX2 && bitWidth <= 26:
		unpackInt32x17to26bitsAVX2(dst, src, bitWidth)
	case hasAVX2 && bitWidth <= 31:
		unpackInt32x27to31bitsAVX2(dst, src, bitWidth)
	case bitWidth == 32:
		copy(dst, unsafecast.BytesToInt32(src))
	default:
		unpackInt32Default(dst, src, bitWidth)
	}
}


================================================
FILE: internal/bitpack/unpack_int32_amd64.s
================================================
//go:build !purego

#include "funcdata.h"
#include "textflag.h"

// func unpackInt32Default(dst []int32, src []byte, bitWidth uint)
TEXT ·unpackInt32Default(SB), NOSPLIT, $0-56
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), DX
    MOVQ src_base+24(FP), BX
    MOVQ bitWidth+48(FP), CX

    MOVQ $1, R8 // bitMask = (1 << bitWidth) - 1
    SHLQ CX, R8
    DECQ R8
    MOVQ CX, R9 // bitWidth

    XORQ DI, DI // bitOffset
    XORQ SI, SI // index
    JMP test
loop:
    MOVQ DI, R10
    MOVQ DI, CX
    SHRQ $5, R10      // i = bitOffset / 32
    ANDQ $0b11111, CX // j = bitOffset % 32

    MOVL (BX)(R10*4), R11
    MOVL R8, R12  // d = bitMask
    SHLL CX, R12  // d = d << j
    ANDL R12, R11 // d = src[i] & d
    SHRL CX, R11  // d = d >> j

    MOVL CX, R13
    ADDL R9, R13
    CMPL R13, $32
    JBE next // j+bitWidth <= 32 ?

    MOVL 4(BX)(R10*4), R14
    MOVL CX, R12
    MOVL $32, CX
    SUBL R12, CX  // k = 32 - j
    MOVL R8, R12  // c = bitMask
    SHRL CX, R12  // c = c >> k
    ANDL R12, R14 // c = src[i+1] & c
    SHLL CX, R14  // c = c << k
    ORL R14, R11  // d = d | c
next:
    MOVL R11, (AX)(SI*4) // dst[n] = d
    ADDQ R9, DI          // bitOffset += bitWidth
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
    RET

// -----------------------------------------------------------------------------
// The unpack* functions below are adaptations of the algorithms
// described in "Decoding billions of integers per second through vectorization"
// from D. Lemire & L. Boytsov, the following changes were made:
//
// - The paper described two methods for decoding integers called "horizontal"
//   and "vertical". The "horizontal" version is the one that applies the best
//   to the bit packing done in the Parquet delta encoding; however, it also
//   differs in some ways, many compression techniques discussed in the paper
//   are not implemented in the Parquet format.
//
// - The paper focuses on implementations based on SSE instructions, which
//   describes how to use PMULLD to emulate the lack of variable bit shift
//   for packed integers. Our version of the bit unpacking algorithms here
//   uses AVX2 and can perform variable bit shifts using VPSRLVD, which yields
//   better throughput since the instruction latency is a single CPU cycle,
//   vs 10 for VPMULLD.
//
// - The reference implementation at https://github.com/lemire/FastPFor/ uses
//   specializations for each bit size, resulting in 32 unique functions.
//   Our version here are more generic, we provide 3 variations of the
//   algorithm for bit widths 1 to 16, 17 to 26, and 27 to 31 (unpacking 32
//   bits values is a simple copy). In that regard, our implementation is
//   somewhat an improvement over the reference, since it uses less code and
//   less memory to hold the shuffle masks and shift tables.
//
// Technically, each specialization of our functions could be expressed by the
// algorithm used for unpacking values of 27 to 31 bits. However, multiple steps
// of the main loop can be removed for lower bit widths, providing up to ~35%
// better throughput for smaller sizes. Since we expect delta encoding to often
// result in bit packing values to smaller bit widths, the specializations are
// worth the extra complexity.
//
// For more details, see: https://arxiv.org/pdf/1209.2137v5.pdf
// -----------------------------------------------------------------------------

// unpackInt32x1to16bitsAVX2 is the implementation of the bit unpacking
// algorithm for inputs of bit width 1 to 16.
//
// In this version of the algorithm, we can perform a single memory load in each
// loop iteration since we know that 8 values will fit in a single XMM register.
//
// func unpackInt32x1to16bitsAVX2(dst []int32, src []byte, bitWidth uint)
TEXT ·unpackInt32x1to16bitsAVX2(SB), NOSPLIT, $56-56
    NO_LOCAL_POINTERS
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), DX
    MOVQ src_base+24(FP), BX
    MOVQ bitWidth+48(FP), CX

    CMPQ DX, $8
    JB tail

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI
    XORQ SI, SI

    MOVQ $1, R8
    SHLQ CX, R8
    DECQ R8
    MOVQ R8, X0
    VPBROADCASTD X0, X0 // bitMask = (1 << bitWidth) - 1

    MOVQ CX, R9
    DECQ R9
    SHLQ $5, R9 // 32 * (bitWidth - 1)

    MOVQ CX, R10
    DECQ R10
    SHLQ $5, R10
    ANDQ $0xFF, R10 // (32 * (bitWidth - 1)) % 256

    LEAQ ·shuffleInt32x1to16bits(SB), R11
    VMOVDQA (R11)(R9*1), X1
    VMOVDQA 16(R11)(R9*1), X2

    LEAQ ·shiftRightInt32(SB), R12
    VMOVDQA (R12)(R10*1), X3
    VMOVDQA 16(R12)(R10*1), X4
loop:
    VMOVDQU (BX), X7

    VPSHUFB X1, X7, X5
    VPSHUFB X2, X7, X6

    VPSRLVD X3, X5, X5
    VPSRLVD X4, X6, X6

    VPAND X0, X5, X5
    VPAND X0, X6, X6

    VMOVDQU X5, (AX)(SI*4)
    VMOVDQU X6, 16(AX)(SI*4)

    ADDQ CX, BX
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loop
    VZEROUPPER

    CMPQ SI, DX
    JE done
    LEAQ (AX)(SI*4), AX
    SUBQ SI, DX
tail:
    MOVQ AX, dst_base-56(SP)
    MOVQ DX, dst_len-48(SP)
    MOVQ BX, src_base-32(SP)
    MOVQ CX, bitWidth-8(SP)
    CALL ·unpackInt32Default(SB)
done:
    RET

// unpackInt32x17to26bitsAVX2 is the implementation of the bit unpacking
// algorithm for inputs of bit width 17 to 26.
//
// In this version of the algorithm, we need to 32 bytes at each loop iteration
// because 8 bit-packed values will span across two XMM registers.
//
// func unpackInt32x17to26bitsAVX2(dst []int32, src []byte, bitWidth uint)
TEXT ·unpackInt32x17to26bitsAVX2(SB), NOSPLIT, $56-56
    NO_LOCAL_POINTERS
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), DX
    MOVQ src_base+24(FP), BX
    MOVQ bitWidth+48(FP), CX

    CMPQ DX, $8
    JB tail

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI
    XORQ SI, SI

    MOVQ $1, R8
    SHLQ CX, R8
    DECQ R8
    MOVQ R8, X0
    VPBROADCASTD X0, X0

    MOVQ CX, R9
    SUBQ $17, R9
    IMULQ $48, R9 // 48 * (bitWidth - 17)

    MOVQ CX, R10
    DECQ R10
    SHLQ $5, R10
    ANDQ $0xFF, R10 // (32 * (bitWidth - 1)) % 256

    LEAQ ·shuffleInt32x17to26bits(SB), R11
    VMOVDQA (R11)(R9*1), X1
    VMOVDQA 16(R11)(R9*1), X2
    VMOVDQA 32(R11)(R9*1), X3

    LEAQ ·shiftRightInt32(SB), R12
    VMOVDQA (R12)(R10*1), X4
    VMOVDQA 16(R12)(R10*1), X5
loop:
    VMOVDQU (BX), X6
    VMOVDQU 16(BX), X7

    VPSHUFB X1, X6, X8
    VPSHUFB X2, X6, X9
    VPSHUFB X3, X7, X10
    VPOR X10, X9, X9

    VPSRLVD X4, X8, X8
    VPSRLVD X5, X9, X9

    VPAND X0, X8, X8
    VPAND X0, X9, X9

    VMOVDQU X8, (AX)(SI*4)
    VMOVDQU X9, 16(AX)(SI*4)

    ADDQ CX, BX
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loop
    VZEROUPPER

    CMPQ SI, DX
    JE done
    LEAQ (AX)(SI*4), AX
    SUBQ SI, DX
tail:
    MOVQ AX, dst_base-56(SP)
    MOVQ DX, dst_len-48(SP)
    MOVQ BX, src_base-32(SP)
    MOVQ CX, bitWidth-8(SP)
    CALL ·unpackInt32Default(SB)
done:
    RET

// unpackInt32x27to31bitsAVX2 is the implementation of the bit unpacking
// algorithm for inputs of bit width 27 to 31.
//
// In this version of the algorithm the bit-packed values may span across up to
// 5 bytes. The simpler approach for smaller bit widths where we could perform a
// single shuffle + shift to unpack the values do not work anymore.
//
// Values are unpacked in two steps: the first one extracts lower bits which are
// shifted RIGHT to align on the beginning of 32 bit words, the second extracts
// upper bits which are shifted LEFT to be moved to the end of the 32 bit words.
//
// The amount of LEFT shifts is always "8 minus the amount of RIGHT shift".
//
// func unpackInt32x27to31bitsAVX2(dst []int32, src []byte, bitWidth uint)
TEXT ·unpackInt32x27to31bitsAVX2(SB), NOSPLIT, $56-56
    NO_LOCAL_POINTERS
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), DX
    MOVQ src_base+24(FP), BX
    MOVQ bitWidth+48(FP), CX

    CMPQ DX, $8
    JB tail

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI
    XORQ SI, SI

    MOVQ $1, R8
    SHLQ CX, R8
    DECQ R8
    MOVQ R8, X0
    VPBROADCASTD X0, X0

    MOVQ CX, R9
    SUBQ $27, R9
    IMULQ $80, R9 // (80 * (bitWidth - 27))

    MOVQ CX, R10
    DECQ R10
    SHLQ $5, R10
    ANDQ $0xFF, R10 // (32 * (bitWidth - 1)) % 256

    LEAQ ·shuffleInt32x27to31bits(SB), R11
    VMOVDQA (R11)(R9*1), X1
    VMOVDQA 16(R11)(R9*1), X2
    VMOVDQA 32(R11)(R9*1), X3
    VMOVDQA 48(R11)(R9*1), X4
    VMOVDQA 64(R11)(R9*1), X5

    LEAQ ·shiftRightInt32(SB), R12
    LEAQ ·shiftLeftInt32(SB), R13
    VMOVDQA (R12)(R10*1), X6
    VMOVDQA (R13)(R10*1), X7
    VMOVDQA 16(R12)(R10*1), X8
    VMOVDQA 16(R13)(R10*1), X9
loop:
    VMOVDQU (BX), X10
    VMOVDQU 16(BX), X11

    VPSHUFB X1, X10, X12
    VPSHUFB X2, X10, X13
    VPSHUFB X3, X10, X14
    VPSHUFB X4, X11, X15
    VPSHUFB X5, X11, X11

    VPSRLVD X6, X12, X12
    VPSLLVD X7, X13, X13
    VPSRLVD X8, X14, X14
    VPSRLVD X8, X15, X15
    VPSLLVD X9, X11, X11

    VPOR X13, X12, X12
    VPOR X15, X14, X14
    VPOR X11, X14, X14

    VPAND X0, X12, X12
    VPAND X0, X14, X14

    VMOVDQU X12, (AX)(SI*4)
    VMOVDQU X14, 16(AX)(SI*4)

    ADDQ CX, BX
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loop
    VZEROUPPER

    CMPQ SI, DX
    JE done
    LEAQ (AX)(SI*4), AX
    SUBQ SI, DX
tail:
    MOVQ AX, dst_base-56(SP)
    MOVQ DX, dst_len-48(SP)
    MOVQ BX, src_base-32(SP)
    MOVQ CX, bitWidth-8(SP)
    CALL ·unpackInt32Default(SB)
done:
    RET


================================================
FILE: internal/bitpack/unpack_int32_purego.go
================================================
//go:build purego || !amd64

package bitpack

import (
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

func unpackInt32(dst []int32, src []byte, bitWidth uint) {
	bits := unsafecast.BytesToUint32(src)
	bitMask := uint32(1<<bitWidth) - 1
	bitOffset := uint(0)

	for n := range dst {
		i := bitOffset / 32
		j := bitOffset % 32
		d := (bits[i] & (bitMask << j)) >> j
		if j+bitWidth > 32 {
			k := 32 - j
			d |= (bits[i+1] & (bitMask >> k)) << k
		}
		dst[n] = int32(d)
		bitOffset += bitWidth
	}
}


================================================
FILE: internal/bitpack/unpack_int64_amd64.go
================================================
//go:build !purego

package bitpack

import (
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"golang.org/x/sys/cpu"
)

//go:noescape
func unpackInt64Default(dst []int64, src []byte, bitWidth uint)

//go:noescape
func unpackInt64x1to32bitsAVX2(dst []int64, src []byte, bitWidth uint)

func unpackInt64(dst []int64, src []byte, bitWidth uint) {
	hasAVX2 := cpu.X86.HasAVX2
	switch {
	case hasAVX2 && bitWidth <= 32:
		unpackInt64x1to32bitsAVX2(dst, src, bitWidth)
	case bitWidth == 64:
		copy(dst, unsafecast.BytesToInt64(src))
	default:
		unpackInt64Default(dst, src, bitWidth)
	}
}


================================================
FILE: internal/bitpack/unpack_int64_amd64.s
================================================
//go:build !purego

#include "funcdata.h"
#include "textflag.h"

// func unpackInt64Default(dst []int64, src []uint32, bitWidth uint)
TEXT ·unpackInt64Default(SB), NOSPLIT, $0-56
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), DX
    MOVQ src_base+24(FP), BX
    MOVQ bitWidth+48(FP), CX

    MOVQ $1, R8 // bitMask = (1 << bitWidth) - 1
    SHLQ CX, R8, R8
    DECQ R8
    MOVQ CX, R9 // bitWidth

    XORQ DI, DI // bitOffset
    XORQ SI, SI // index
    XORQ R10, R10
    XORQ R11, R11
    XORQ R14, R14
    JMP test
loop:
    MOVQ DI, R10
    MOVQ DI, CX
    SHRQ $5, R10      // i = bitOffset / 32
    ANDQ $0b11111, CX // j = bitOffset % 32

    MOVLQZX (BX)(R10*4), R11
    MOVQ R8, R12  // d = bitMask
    SHLQ CX, R12  // d = d << j
    ANDQ R12, R11 // d = src[i] & d
    SHRQ CX, R11  // d = d >> j

    MOVQ CX, R13
    ADDQ R9, R13
    CMPQ R13, $32
    JBE next // j+bitWidth <= 32 ?
    MOVQ CX, R15 // j

    MOVLQZX 4(BX)(R10*4), R14
    MOVQ $32, CX
    SUBQ R15, CX  // k = 32 - j
    MOVQ R8, R12  // c = bitMask
    SHRQ CX, R12  // c = c >> k
    ANDQ R12, R14 // c = src[i+1] & c
    SHLQ CX, R14  // c = c << k
    ORQ R14, R11  // d = d | c

    CMPQ R13, $64
    JBE next

    MOVLQZX 8(BX)(R10*4), R14
    MOVQ $64, CX
    SUBQ R15, CX  // k = 64 - j
    MOVQ R8, R12  // c = bitMask
    SHRQ CX, R12  // c = c >> k
    ANDQ R12, R14 // c = src[i+2] & c
    SHLQ CX, R14  // c = c << k
    ORQ R14, R11  // d = d | c
next:
    MOVQ R11, (AX)(SI*8) // dst[n] = d
    ADDQ R9, DI          // bitOffset += bitWidth
    INCQ SI
test:
    CMPQ SI, DX
    JNE loop
    RET

// This bit unpacking function was inspired from the 32 bit version, but
// adapted to account for the fact that eight 64 bit values span across
// two YMM registers, and across lanes of YMM registers.
//
// Because of the two lanes of YMM registers, we cannot use the VPSHUFB
// instruction to dispatch bytes of the input to the registers. Instead we use
// the VPERMD instruction, which has higher latency but supports dispatching
// bytes across register lanes. Measurable throughput gains remain despite the
// algorithm running on a few more CPU cycles per loop.
//
// The initialization phase of this algorithm generates masks for
// permutations and shifts used to decode the bit-packed values.
//
// The permutation masks are written to Y7 and Y8, and contain the results
// of this formula:
//
//      temp[i] = (bitWidth * i) / 32
//      mask[i] = temp[i] | ((temp[i] + 1) << 32)
//
// Since VPERMQ only supports reading the permutation combination from an
// immediate value, we use VPERMD and generate permutation for pairs of two
// consecutive 32 bit words, which is why we have the upper part of each 64
// bit word set with (x+1)<<32.
//
// The masks for right shifts are written to Y5 and Y6, and computed with
// this formula:
//
//      shift[i] = (bitWidth * i) - (32 * ((bitWidth * i) / 32))
//
// The amount to shift by is the number of values previously unpacked, offseted
// by the byte count of 32 bit words that we read from first bits from.
//
// Technically the masks could be precomputed and declared in global tables;
// however, declaring masks for all bit width is tedious and makes code
// maintenance more costly for no measurable benefits on production workloads.
//
// func unpackInt64x1to32bitsAVX2(dst []int64, src []byte, bitWidth uint)
TEXT ·unpackInt64x1to32bitsAVX2(SB), NOSPLIT, $56-56
    NO_LOCAL_POINTERS
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), DX
    MOVQ src_base+24(FP), BX
    MOVQ bitWidth+48(FP), CX

    CMPQ DX, $8
    JB tail

    MOVQ DX, DI
    SHRQ $3, DI
    SHLQ $3, DI
    XORQ SI, SI

    MOVQ $1, R8
    SHLQ CX, R8
    DECQ R8
    MOVQ R8, X0
    VPBROADCASTQ X0, Y0 // bitMask = (1 << bitWidth) - 1

    VPCMPEQQ Y1, Y1, Y1
    VPSRLQ $63, Y1, Y1  // [1,1,1,1]

    MOVQ CX, X2
    VPBROADCASTQ X2, Y2 // [bitWidth]

    VMOVDQU range0n7<>+0(SB), Y3  // [0,1,2,3]
    VMOVDQU range0n7<>+32(SB), Y4 // [4,5,6,7]

    VPMULLD Y2, Y3, Y5 // [bitWidth] * [0,1,2,3]
    VPMULLD Y2, Y4, Y6 // [bitWidth] * [4,5,6,7]

    VPSRLQ $5, Y5, Y7 // ([bitWidth] * [0,1,2,3]) / 32
    VPSRLQ $5, Y6, Y8 // ([bitWidth] * [4,5,6,7]) / 32

    VPSLLQ $5, Y7, Y9  // (([bitWidth] * [0,1,2,3]) / 32) * 32
    VPSLLQ $5, Y8, Y10 // (([bitWidth] * [4,5,6,7]) / 32) * 32

    VPADDQ Y1, Y7, Y11
    VPADDQ Y1, Y8, Y12
    VPSLLQ $32, Y11, Y11
    VPSLLQ $32, Y12, Y12
    VPOR Y11, Y7, Y7 // permutations[i] = [i | ((i + 1) << 32)]
    VPOR Y12, Y8, Y8 // permutations[i] = [i | ((i + 1) << 32)]

    VPSUBQ Y9, Y5, Y5 // shifts
    VPSUBQ Y10, Y6, Y6
loop:
    VMOVDQU (BX), Y1

    VPERMD Y1, Y7, Y2
    VPERMD Y1, Y8, Y3

    VPSRLVQ Y5, Y2, Y2
    VPSRLVQ Y6, Y3, Y3

    VPAND Y0, Y2, Y2
    VPAND Y0, Y3, Y3

    VMOVDQU Y2, (AX)(SI*8)
    VMOVDQU Y3, 32(AX)(SI*8)

    ADDQ CX, BX
    ADDQ $8, SI
    CMPQ SI, DI
    JNE loop
    VZEROUPPER

    CMPQ SI, DX
    JE done
    LEAQ (AX)(SI*8), AX
    SUBQ SI, DX
tail:
    MOVQ AX, dst_base-56(SP)
    MOVQ DX, dst_len-48(SP)
    MOVQ BX, src_base-32(SP)
    MOVQ CX, bitWidth-8(SP)
    CALL ·unpackInt64Default(SB)
done:
    RET

GLOBL range0n7<>(SB), RODATA|NOPTR, $64
DATA range0n7<>+0(SB)/8,  $0
DATA range0n7<>+8(SB)/8,  $1
DATA range0n7<>+16(SB)/8, $2
DATA range0n7<>+24(SB)/8, $3
DATA range0n7<>+32(SB)/8, $4
DATA range0n7<>+40(SB)/8, $5
DATA range0n7<>+48(SB)/8, $6
DATA range0n7<>+56(SB)/8, $7


================================================
FILE: internal/bitpack/unpack_int64_purego.go
================================================
//go:build purego || !amd64

package bitpack

import "github.com/segmentio/parquet-go/internal/unsafecast"

func unpackInt64(dst []int64, src []byte, bitWidth uint) {
	bits := unsafecast.BytesToUint32(src)
	bitMask := uint64(1<<bitWidth) - 1
	bitOffset := uint(0)

	for n := range dst {
		i := bitOffset / 32
		j := bitOffset % 32
		d := (uint64(bits[i]) & (bitMask << j)) >> j
		if j+bitWidth > 32 {
			k := 32 - j
			d |= (uint64(bits[i+1]) & (bitMask >> k)) << k
			if j+bitWidth > 64 {
				k := 64 - j
				d |= (uint64(bits[i+2]) & (bitMask >> k)) << k
			}
		}
		dst[n] = int64(d)
		bitOffset += bitWidth
	}
}


================================================
FILE: internal/bitpack/unpack_test.go
================================================
package bitpack_test

import (
	"fmt"
	"math/rand"
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go/internal/bitpack"
)

const (
	blockSize = 128
)

func TestUnpackInt32(t *testing.T) {
	for bitWidth := uint(1); bitWidth <= 32; bitWidth++ {
		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
			block := [blockSize]int32{}
			bitMask := int32(bitWidth<<1) - 1

			prng := rand.New(rand.NewSource(0))
			for i := range block {
				block[i] = prng.Int31() & bitMask
			}

			size := (blockSize * bitWidth) / 8
			buf := make([]byte, size+bitpack.PaddingInt32)
			bitpack.PackInt32(buf, block[:], bitWidth)

			src := buf[:size]
			dst := make([]int32, blockSize)

			for n := 1; n <= blockSize; n++ {
				for i := range dst {
					dst[i] = 0
				}

				bitpack.UnpackInt32(dst[:n], src, bitWidth)

				if !reflect.DeepEqual(block[:n], dst[:n]) {
					t.Fatalf("values mismatch for length=%d\nwant: %v\ngot:  %v", n, block[:n], dst[:n])
				}
			}
		})
	}
}

func TestUnpackInt64(t *testing.T) {
	for bitWidth := uint(1); bitWidth <= 63; bitWidth++ {
		t.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(t *testing.T) {
			block := [blockSize]int64{}
			bitMask := int64(bitWidth<<1) - 1

			prng := rand.New(rand.NewSource(0))
			for i := range block {
				block[i] = prng.Int63() & bitMask
			}

			size := (blockSize * bitWidth) / 8
			buf := make([]byte, size+bitpack.PaddingInt64)
			bitpack.PackInt64(buf, block[:], bitWidth)

			src := buf[:size]
			dst := make([]int64, blockSize)

			for n := 1; n <= blockSize; n++ {
				for i := range dst {
					dst[i] = 0
				}

				bitpack.UnpackInt64(dst[:n], src, bitWidth)

				if !reflect.DeepEqual(block[:n], dst[:n]) {
					t.Fatalf("values mismatch for length=%d\nwant: %v\ngot:  %v", n, block[:n], dst[:n])
				}
			}
		})
	}
}

func BenchmarkUnpackInt32(b *testing.B) {
	for bitWidth := uint(1); bitWidth <= 32; bitWidth++ {
		block := [blockSize]int32{}
		buf := [4*blockSize + bitpack.PaddingInt32]byte{}
		bitpack.PackInt32(buf[:], block[:], bitWidth)

		b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) {
			dst := block[:]
			src := buf[:]

			for i := 0; i < b.N; i++ {
				bitpack.UnpackInt32(dst, src, bitWidth)
			}

			b.SetBytes(4 * blockSize)
		})
	}
}

func BenchmarkUnpackInt64(b *testing.B) {
	for bitWidth := uint(1); bitWidth <= 64; bitWidth++ {
		block := [blockSize]int64{}
		buf := [8*blockSize + bitpack.PaddingInt64]byte{}
		bitpack.PackInt64(buf[:], block[:], bitWidth)

		b.Run(fmt.Sprintf("bitWidth=%d", bitWidth), func(b *testing.B) {
			dst := block[:]
			src := buf[:]

			for i := 0; i < b.N; i++ {
				bitpack.UnpackInt64(dst, src, bitWidth)
			}

			b.SetBytes(4 * blockSize)
		})
	}
}


================================================
FILE: internal/bytealg/broadcast_amd64.go
================================================
//go:build !purego

package bytealg

//go:noescape
func broadcastAVX2(dst []byte, src byte)

// Broadcast writes the src value to all bytes of dst.
func Broadcast(dst []byte, src byte) {
	if len(dst) >= 8 && hasAVX2 {
		broadcastAVX2(dst, src)
	} else {
		for i := range dst {
			dst[i] = src
		}
	}
}


================================================
FILE: internal/bytealg/broadcast_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func broadcastAVX2(dst []byte, src byte)
TEXT ·broadcastAVX2(SB), NOSPLIT, $0-25
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), BX
    MOVBQZX src+24(FP), CX

    CMPQ BX, $8
    JBE test

    CMPQ BX, $64
    JB init8

    XORQ SI, SI
    MOVQ BX, DX
    SHRQ $6, DX
    SHLQ $6, DX
    MOVQ CX, X0
    VPBROADCASTB X0, Y0
loop64:
    VMOVDQU Y0, (AX)(SI*1)
    VMOVDQU Y0, 32(AX)(SI*1)
    ADDQ $64, SI
    CMPQ SI, DX
    JNE loop64
    VMOVDQU Y0, -64(AX)(BX*1)
    VMOVDQU Y0, -32(AX)(BX*1)
    VZEROUPPER
    RET

init8:
    MOVQ $0x0101010101010101, R8
    IMULQ R8, CX
loop8:
    MOVQ CX, -8(AX)(BX*1)
    SUBQ $8, BX
    CMPQ BX, $8
    JAE loop8
    MOVQ CX, (AX)
    RET

loop:
    MOVB CX, -1(AX)(BX*1)
    DECQ BX
test:
    CMPQ BX, $0
    JNE loop
    RET


================================================
FILE: internal/bytealg/broadcast_purego.go
================================================
//go:build purego || !amd64

package bytealg

func Broadcast(dst []byte, src byte) {
	for i := range dst {
		dst[i] = src
	}
}


================================================
FILE: internal/bytealg/broadcast_test.go
================================================
package bytealg_test

import (
	"fmt"
	"testing"

	"github.com/segmentio/parquet-go/internal/bytealg"
)

func TestBroadcast(t *testing.T) {
	const N = 100_0000
	buffer := make([]byte, N)

	for n := 1; n <= N; n = (n * 2) + 1 {
		t.Run(fmt.Sprintf("size=%d", n), func(t *testing.T) {
			b := buffer[:n]

			for i := range b {
				b[i] = 0
			}

			bytealg.Broadcast(b, 42)

			for i, c := range b {
				if c != 42 {
					t.Fatalf("byte at index %d has value %d", i, c)
				}
			}
		})
	}
}

func BenchmarkBroadcast(b *testing.B) {
	for _, size := range []int{0, 10, 100, 1000, 10_000} {
		b.Run(fmt.Sprintf("size=%d", size), func(b *testing.B) {
			data := make([]byte, size)

			for i := 0; i < b.N; i++ {
				bytealg.Broadcast(data, 1)
			}

			b.SetBytes(int64(size))
		})
	}
}


================================================
FILE: internal/bytealg/bytealg.go
================================================
// Package bytealg contains optimized algorithms operating on byte slices.
package bytealg


================================================
FILE: internal/bytealg/bytealg_amd64.go
================================================
//go:build !purego

package bytealg

import "golang.org/x/sys/cpu"

var (
	hasAVX2 = cpu.X86.HasAVX2
	// These use AVX-512 instructions in the countByte algorithm relies
	// operations that are available in the AVX512BW extension:
	// * VPCMPUB
	// * KMOVQ
	//
	// Note that the function will fallback to an AVX2 version if those
	// instructions are not available.
	hasAVX512Count = cpu.X86.HasAVX512VL && cpu.X86.HasAVX512BW
)


================================================
FILE: internal/bytealg/bytealg_test.go
================================================
package bytealg_test

import (
	"fmt"
	"testing"
)

var benchmarkBufferSizes = [...]int{
	4 * 1024,
	256 * 1024,
	2048 * 1024,
}

func forEachBenchmarkBufferSize(b *testing.B, f func(*testing.B, int)) {
	for _, bufferSize := range benchmarkBufferSizes {
		b.Run(fmt.Sprintf("%dKiB", bufferSize/1024), func(b *testing.B) {
			b.SetBytes(int64(bufferSize))
			f(b, bufferSize)
		})
	}
}


================================================
FILE: internal/bytealg/count_amd64.go
================================================
//go:build !purego

package bytealg

// This function is similar to using the standard bytes.Count function with a
// one-byte separator. However, the implementation makes use of AVX-512 when
// possible, which yields measurable throughput improvements:
//
// name       old time/op    new time/op    delta
// CountByte    82.5ns ± 0%    43.9ns ± 0%  -46.74%  (p=0.000 n=10+10)
//
// name       old speed      new speed      delta
// CountByte  49.6GB/s ± 0%  93.2GB/s ± 0%  +87.74%  (p=0.000 n=10+10)
//
// On systems that do not have AVX-512, the AVX2 version of the code is also
// optimized to make use of multiple register lanes, which gives a bit better
// throughput than the standard library function:
//
// name       old time/op    new time/op    delta
// CountByte    82.5ns ± 0%    61.0ns ± 0%  -26.04%  (p=0.000 n=10+10)
//
// name       old speed      new speed      delta
// CountByte  49.6GB/s ± 0%  67.1GB/s ± 0%  +35.21%  (p=0.000 n=10+10)
//
//go:noescape
func Count(data []byte, value byte) int


================================================
FILE: internal/bytealg/count_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func Count(data []byte, value byte) int
TEXT ·Count(SB), NOSPLIT, $0-40
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    MOVB value+24(FP), BX
    MOVQ CX, DX // len
    ADDQ AX, CX // end
    XORQ SI, SI // count

    CMPQ DX, $256
    JB test

    CMPB ·hasAVX2(SB), $0
    JE test

    XORQ R12, R12
    XORQ R13, R13
    XORQ R14, R14
    XORQ R15, R15

    CMPB ·hasAVX512Count(SB), $0
    JE initAVX2

    SHRQ $8, DX
    SHLQ $8, DX
    ADDQ AX, DX
    VPBROADCASTB BX, Z0
loopAVX512:
    VMOVDQU64 (AX), Z1
    VMOVDQU64 64(AX), Z2
    VMOVDQU64 128(AX), Z3
    VMOVDQU64 192(AX), Z4
    VPCMPUB $0, Z0, Z1, K1
    VPCMPUB $0, Z0, Z2, K2
    VPCMPUB $0, Z0, Z3, K3
    VPCMPUB $0, Z0, Z4, K4
    KMOVQ K1, R8
    KMOVQ K2, R9
    KMOVQ K3, R10
    KMOVQ K4, R11
    POPCNTQ R8, R8
    POPCNTQ R9, R9
    POPCNTQ R10, R10
    POPCNTQ R11, R11
    ADDQ R8, R12
    ADDQ R9, R13
    ADDQ R10, R14
    ADDQ R11, R15
    ADDQ $256, AX
    CMPQ AX, DX
    JNE loopAVX512
    ADDQ R12, R13
    ADDQ R14, R15
    ADDQ R13, SI
    ADDQ R15, SI
    JMP doneAVX

initAVX2:
    SHRQ $6, DX
    SHLQ $6, DX
    ADDQ AX, DX
    VPBROADCASTB value+24(FP), Y0
loopAVX2:
    VMOVDQU (AX), Y1
    VMOVDQU 32(AX), Y2
    VPCMPEQB Y0, Y1, Y1
    VPCMPEQB Y0, Y2, Y2
    VPMOVMSKB Y1, R12
    VPMOVMSKB Y2, R13
    POPCNTL R12, R12
    POPCNTL R13, R13
    ADDQ R12, R14
    ADDQ R13, R15
    ADDQ $64, AX
    CMPQ AX, DX
    JNE loopAVX2
    ADDQ R14, SI
    ADDQ R15, SI

doneAVX:
    VZEROUPPER
    JMP test

loop:
    MOVQ SI, DI
    INCQ DI
    MOVB (AX), R8
    CMPB BX, R8
    CMOVQEQ DI, SI
    INCQ AX
test:
    CMPQ AX, CX
    JNE loop
done:
    MOVQ SI, ret+32(FP)
    RET


================================================
FILE: internal/bytealg/count_purego.go
================================================
//go:build purego || !amd64

package bytealg

import "bytes"

func Count(data []byte, value byte) int {
	return bytes.Count(data, []byte{value})
}


================================================
FILE: internal/bytealg/count_test.go
================================================
package bytealg_test

import (
	"bytes"
	"testing"

	"github.com/segmentio/parquet-go/internal/bytealg"
	"github.com/segmentio/parquet-go/internal/quick"
)

func TestCount(t *testing.T) {
	err := quick.Check(func(data []byte) bool {
		data = bytes.Repeat(data, 8)
		for _, c := range data {
			n1 := bytes.Count(data, []byte{c})
			n2 := bytealg.Count(data, c)
			if n1 != n2 {
				t.Errorf("got=%d want=%d", n2, n1)
				return false
			}
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}
}

func BenchmarkCount(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		data := make([]byte, bufferSize)
		for i := range data {
			data[i] = byte(i)
		}
		for i := 0; i < b.N; i++ {
			bytealg.Count(data, 0)
		}
	})
}


================================================
FILE: internal/debug/debug.go
================================================
package debug

import (
	"encoding/hex"
	"fmt"
	"io"
	"log"
	"os"
	"strconv"
	"strings"
)

func ReaderAt(reader io.ReaderAt, prefix string) io.ReaderAt {
	return &ioReaderAt{
		reader: reader,
		prefix: prefix,
	}
}

type ioReaderAt struct {
	reader io.ReaderAt
	prefix string
}

func (d *ioReaderAt) ReadAt(b []byte, off int64) (int, error) {
	n, err := d.reader.ReadAt(b, off)
	fmt.Printf("%s: Read(%d) @%d => %d %v \n%s\n", d.prefix, len(b), off, n, err, hex.Dump(b[:n]))
	return n, err
}

func Reader(reader io.Reader, prefix string) io.Reader {
	return &ioReader{
		reader: reader,
		prefix: prefix,
	}
}

type ioReader struct {
	reader io.Reader
	prefix string
	offset int64
}

func (d *ioReader) Read(b []byte) (int, error) {
	n, err := d.reader.Read(b)
	fmt.Printf("%s: Read(%d) @%d => %d %v \n%s\n", d.prefix, len(b), d.offset, n, err, hex.Dump(b[:n]))
	d.offset += int64(n)
	return n, err
}

func Writer(writer io.Writer, prefix string) io.Writer {
	return &ioWriter{
		writer: writer,
		prefix: prefix,
	}
}

type ioWriter struct {
	writer io.Writer
	prefix string
	offset int64
}

func (d *ioWriter) Write(b []byte) (int, error) {
	n, err := d.writer.Write(b)
	fmt.Printf("%s: Write(%d) @%d => %d %v \n  %q\n", d.prefix, len(b), d.offset, n, err, b[:n])
	d.offset += int64(n)
	return n, err
}

var (
	TRACEBUF int
)

func init() {
	for _, arg := range strings.Split(os.Getenv("PARQUETGODEBUG"), ",") {
		k := arg
		v := ""
		i := strings.IndexByte(arg, '=')
		if i >= 0 {
			k, v = arg[:i], arg[i+1:]
		}
		var err error
		switch k {
		case "":
			// ignore empty entries
		case "tracebuf":
			if TRACEBUF, err = strconv.Atoi(v); err != nil {
				log.Printf("PARQUETGODEBUG: invalid value for tracebuf: %q", v)
			}
		default:
			log.Printf("PARQUETGODEBUG: unrecognized debug option: %q", k)
		}
	}
}


================================================
FILE: internal/debug/finalizer_off.go
================================================
//go:build debug

package debug

// SetFinalizer is a no-op when the debug tag is specified.
func SetFinalizer(interface{}, interface{}) {}


================================================
FILE: internal/debug/finalizer_on.go
================================================
//go:build !debug

package debug

import "runtime"

func SetFinalizer(obj, finalizer interface{}) { runtime.SetFinalizer(obj, finalizer) }


================================================
FILE: internal/quick/quick.go
================================================
package quick

import (
	"fmt"
	"math"
	"math/rand"
	"reflect"
	"strings"
	"time"
)

var DefaultConfig = Config{
	Sizes: []int{
		0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
		10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
		20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
		30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
		99, 100, 101,
		127, 128, 129,
		255, 256, 257,
		1000, 1023, 1024, 1025,
		2000, 2095, 2048, 2049,
		4000, 4095, 4096, 4097,
	},
	Seed: 0,
}

// Check is inspired by the standard quick.Check package, but enhances the
// API and tests arrays of larger sizes than the maximum of 50 hardcoded in
// testing/quick.
func Check(f interface{}) error {
	return DefaultConfig.Check(f)
}

type Config struct {
	Sizes []int
	Seed  int64
}

func (c *Config) Check(f interface{}) error {
	v := reflect.ValueOf(f)
	r := rand.New(rand.NewSource(c.Seed))
	t := v.Type().In(0)

	makeValue := MakeValueFuncOf(t.Elem())
	makeArray := func(n int) reflect.Value {
		array := reflect.MakeSlice(t, n, n)
		for i := 0; i < n; i++ {
			makeValue(array.Index(i), r)
		}
		return array
	}

	if makeArray == nil {
		panic("cannot run quick check on function with input of type " + v.Type().In(0).String())
	}

	for _, n := range c.Sizes {
		for i := 0; i < 3; i++ {
			in := makeArray(n)
			ok := v.Call([]reflect.Value{in})
			if !ok[0].Bool() {
				return fmt.Errorf("test #%d: failed on input of size %d: %#v\n", i+1, n, in.Interface())
			}
		}
	}
	return nil

}

type MakeValueFunc func(reflect.Value, *rand.Rand)

func MakeValueFuncOf(t reflect.Type) MakeValueFunc {
	switch t {
	case reflect.TypeOf(time.Time{}):
		return func(v reflect.Value, r *rand.Rand) {
			// TODO: This is a hack to support the matching of times in a precision
			// other than nanosecond by generating times rounded to the second. A
			// better solution would be to update columns types to add a compare
			// function.
			sec := r.Int63n(2524608000) // 2050-01-01
			v.Set(reflect.ValueOf(time.Unix(sec, 0).UTC()))
		}
	}

	switch t.Kind() {
	case reflect.Bool:
		return func(v reflect.Value, r *rand.Rand) {
			v.SetBool((r.Int() % 2) != 0)
		}

	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
		return func(v reflect.Value, r *rand.Rand) {
			v.SetInt(r.Int63n(math.MaxInt32))
		}

	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
		return func(v reflect.Value, r *rand.Rand) {
			v.SetUint(r.Uint64())
		}

	case reflect.Float32, reflect.Float64:
		return func(v reflect.Value, r *rand.Rand) {
			v.SetFloat(r.Float64())
		}

	case reflect.String:
		return func(v reflect.Value, r *rand.Rand) {
			const characters = "1234567890qwertyuiopasdfghjklzxcvbnm"
			s := new(strings.Builder)
			n := r.Intn(10)
			for i := 0; i < n; i++ {
				s.WriteByte(characters[i])
			}
			v.SetString(s.String())
		}

	case reflect.Array:
		makeElem := MakeValueFuncOf(t.Elem())
		return func(v reflect.Value, r *rand.Rand) {
			for i, n := 0, v.Len(); i < n; i++ {
				makeElem(v.Index(i), r)
			}
		}

	case reflect.Slice:
		switch e := t.Elem(); e.Kind() {
		case reflect.Uint8:
			return func(v reflect.Value, r *rand.Rand) {
				b := make([]byte, r.Intn(50))
				r.Read(b)
				v.SetBytes(b)
			}
		default:
			makeElem := MakeValueFuncOf(t.Elem())
			return func(v reflect.Value, r *rand.Rand) {
				n := r.Intn(10)
				s := reflect.MakeSlice(t, n, n)
				for i := 0; i < n; i++ {
					makeElem(s.Index(i), r)
				}
				v.Set(s)
			}
		}

	case reflect.Map:
		makeKey := MakeValueFuncOf(t.Key())
		makeElem := MakeValueFuncOf(t.Elem())
		return func(v reflect.Value, r *rand.Rand) {
			m := reflect.MakeMap(t)
			n := r.Intn(10)
			k := reflect.New(t.Key()).Elem()
			e := reflect.New(t.Elem()).Elem()
			for i := 0; i < n; i++ {
				makeKey(k, r)
				makeElem(e, r)
				m.SetMapIndex(k, e)
			}
			v.Set(m)
		}

	case reflect.Struct:
		fields := make([]reflect.StructField, 0, t.NumField())
		makeValues := make([]MakeValueFunc, 0, cap(fields))
		for i, n := 0, cap(fields); i < n; i++ {
			if f := t.Field(i); f.PkgPath == "" { // skip unexported fields
				fields = append(fields, f)
				makeValues = append(makeValues, MakeValueFuncOf(f.Type))
			}
		}
		return func(v reflect.Value, r *rand.Rand) {
			for i := range fields {
				makeValues[i](v.FieldByIndex(fields[i].Index), r)
			}
		}

	case reflect.Ptr:
		t = t.Elem()
		makeValue := MakeValueFuncOf(t)
		return func(v reflect.Value, r *rand.Rand) {
			v.Set(reflect.New(t))
			makeValue(v.Elem(), r)
		}

	default:
		panic("quick.Check does not support test values of type " + t.String())
	}
}


================================================
FILE: internal/unsafecast/unsafecast_go17.go
================================================
//go:build !go1.18

package unsafecast

import (
	"reflect"
	"unsafe"
)

func AddressOfBytes(data []byte) *byte {
	return *(**byte)(unsafe.Pointer(&data))
}

func AddressOfString(data string) *byte {
	return *(**byte)(unsafe.Pointer(&data))
}

func PointerOfValue(value reflect.Value) unsafe.Pointer {
	return (*[2]unsafe.Pointer)(unsafe.Pointer(&value))[1]
}

func BoolToBytes(data []bool) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), len(data))
}

func Int8ToBytes(data []int8) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), len(data))
}

func Int16ToBytes(data []int16) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 2*len(data))
}

func Int32ToBytes(data []int32) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 4*len(data))
}

func Int64ToBytes(data []int64) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 8*len(data))
}

func Float32ToBytes(data []float32) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 4*len(data))
}

func Float64ToBytes(data []float64) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 8*len(data))
}

func Int16ToUint16(data []int16) []uint16 {
	return unsafe.Slice(*(**uint16)(unsafe.Pointer(&data)), len(data))
}

func Int32ToUint32(data []int32) []uint32 {
	return unsafe.Slice(*(**uint32)(unsafe.Pointer(&data)), len(data))
}

func Int64ToUint64(data []int64) []uint64 {
	return unsafe.Slice(*(**uint64)(unsafe.Pointer(&data)), len(data))
}

func Float32ToUint32(data []float32) []uint32 {
	return unsafe.Slice(*(**uint32)(unsafe.Pointer(&data)), len(data))
}

func Float64ToUint64(data []float64) []uint64 {
	return unsafe.Slice(*(**uint64)(unsafe.Pointer(&data)), len(data))
}

func Uint32ToBytes(data []uint32) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 4*len(data))
}

func Uint64ToBytes(data []uint64) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 8*len(data))
}

func Uint128ToBytes(data [][16]byte) []byte {
	return unsafe.Slice(*(**byte)(unsafe.Pointer(&data)), 16*len(data))
}

func Uint32ToInt32(data []uint32) []int32 {
	return unsafe.Slice(*(**int32)(unsafe.Pointer(&data)), len(data))
}

func Uint32ToInt64(data []uint32) []int64 {
	return unsafe.Slice(*(**int64)(unsafe.Pointer(&data)), len(data)/2)
}

func Uint64ToInt64(data []uint64) []int64 {
	return unsafe.Slice(*(**int64)(unsafe.Pointer(&data)), len(data))
}

func BytesToBool(data []byte) []bool {
	return unsafe.Slice(*(**bool)(unsafe.Pointer(&data)), len(data))
}

func BytesToInt8(data []byte) []int8 {
	return unsafe.Slice(*(**int8)(unsafe.Pointer(&data)), len(data))
}

func BytesToInt16(data []byte) []int16 {
	return unsafe.Slice(*(**int16)(unsafe.Pointer(&data)), len(data)/2)
}

func BytesToInt32(data []byte) []int32 {
	return unsafe.Slice(*(**int32)(unsafe.Pointer(&data)), len(data)/4)
}

func BytesToInt64(data []byte) []int64 {
	return unsafe.Slice(*(**int64)(unsafe.Pointer(&data)), len(data)/8)
}

func BytesToUint32(data []byte) []uint32 {
	return unsafe.Slice(*(**uint32)(unsafe.Pointer(&data)), len(data)/4)
}

func BytesToUint64(data []byte) []uint64 {
	return unsafe.Slice(*(**uint64)(unsafe.Pointer(&data)), len(data)/8)
}

func BytesToUint128(data []byte) [][16]byte {
	return unsafe.Slice(*(**[16]byte)(unsafe.Pointer(&data)), len(data)/16)
}

func BytesToFloat32(data []byte) []float32 {
	return unsafe.Slice(*(**float32)(unsafe.Pointer(&data)), len(data)/4)
}

func BytesToFloat64(data []byte) []float64 {
	return unsafe.Slice(*(**float64)(unsafe.Pointer(&data)), len(data)/8)
}

func BytesToString(data []byte) string {
	return *(*string)(unsafe.Pointer(&data))
}

func Bytes(data *byte, size int) []byte {
	return unsafe.Slice(data, size)
}


================================================
FILE: internal/unsafecast/unsafecast_go18.go
================================================
//go:build go1.18

// Package unsafecast exposes functions to bypass the Go type system and perform
// conversions between types that would otherwise not be possible.
//
// The functions of this package are mostly useful as optimizations to avoid
// memory copies when converting between compatible memory layouts; for example,
// casting a [][16]byte to a []byte in order to use functions of the standard
// bytes package on the slices.
//
//	With great power comes great responsibility.
package unsafecast

import (
	"reflect"
	"unsafe"
)

// AddressOf returns the address to the first element in data, even if the slice
// has length zero.
func AddressOf[T any](data []T) *T {
	return *(**T)(unsafe.Pointer(&data))
}

// AddressOfBytes returns the address of the first byte in data.
func AddressOfBytes(data []byte) *byte {
	return *(**byte)(unsafe.Pointer(&data))
}

// AddressOfString returns the address of the first byte in data.
func AddressOfString(data string) *byte {
	return *(**byte)(unsafe.Pointer(&data))
}

// PointerOf is like AddressOf but returns an unsafe.Pointer, losing type
// information about the underlying data.
func PointerOf[T any](data []T) unsafe.Pointer {
	return unsafe.Pointer(AddressOf(data))
}

// PointerOfString is like AddressOfString but returns an unsafe.Pointer, losing
// type information about the underlying data.
func PointerOfString(data string) unsafe.Pointer {
	return unsafe.Pointer(AddressOfString(data))
}

// PointerOfValue returns the address of the object packed in the given value.
//
// This function is like value.UnsafePointer but works for any underlying type,
// bypassing the safety checks done by the reflect package.
func PointerOfValue(value reflect.Value) unsafe.Pointer {
	return (*[2]unsafe.Pointer)(unsafe.Pointer(&value))[1]
}

// The slice type represents the memory layout of slices in Go. It is similar to
// reflect.SliceHeader but uses a unsafe.Pointer instead of uintptr to for the
// backing array to allow the garbage collector to track track the reference.
type slice struct {
	ptr unsafe.Pointer
	len int
	cap int
}

// Slice converts the data slice of type []From to a slice of type []To sharing
// the same backing array. The length and capacity of the returned slice are
// scaled according to the size difference between the source and destination
// types.
//
// Note that the function does not perform any checks to ensure that the memory
// layouts of the types are compatible, it is possible to cause memory
// corruption if the layouts mismatch (e.g. the pointers in the From are different
// than the pointers in To).
func Slice[To, From any](data []From) []To {
	// This function could use unsafe.Slice but it would drop the capacity
	// information, so instead we implement the type conversion.
	var zf From
	var zt To
	var s = (*slice)(unsafe.Pointer(&data))
	s.len = int((uintptr(s.len) * unsafe.Sizeof(zf)) / unsafe.Sizeof(zt))
	s.cap = int((uintptr(s.cap) * unsafe.Sizeof(zf)) / unsafe.Sizeof(zt))
	return *(*[]To)(unsafe.Pointer(s))
}

// Bytes constructs a byte slice. The pointer to the first element of the slice
// is set to data, the length and capacity are set to size.
func Bytes(data *byte, size int) []byte {
	return *(*[]byte)(unsafe.Pointer(&slice{
		ptr: unsafe.Pointer(data),
		len: size,
		cap: size,
	}))
}

// BytesToString converts a byte slice to a string value. The returned string
// shares the backing array of the byte slice.
//
// Programs using this function are responsible for ensuring that the data slice
// is not modified while the returned string is in use, otherwise the guarantee
// of immutability of Go string values will be violated, resulting in undefined
// behavior.
func BytesToString(data []byte) string {
	return *(*string)(unsafe.Pointer(&data))
}

// StringToBytes applies the inverse conversion of BytesToString.
func StringToBytes(data string) []byte {
	return *(*[]byte)(unsafe.Pointer(&slice{
		ptr: PointerOfString(data),
		len: len(data),
		cap: len(data),
	}))
}

// -----------------------------------------------------------------------------
// TODO: the functions below are used for backward compatibility with Go 1.17
// where generics weren't available. We should remove them and inline calls to
// unsafecast.Slice when we change our minimum supported Go version to 1.18.
// -----------------------------------------------------------------------------

func BoolToBytes(data []bool) []byte { return Slice[byte](data) }

func Int8ToBytes(data []int8) []byte { return Slice[byte](data) }

func Int16ToBytes(data []int16) []byte { return Slice[byte](data) }

func Int32ToBytes(data []int32) []byte { return Slice[byte](data) }

func Int64ToBytes(data []int64) []byte { return Slice[byte](data) }

func Float32ToBytes(data []float32) []byte { return Slice[byte](data) }

func Float64ToBytes(data []float64) []byte { return Slice[byte](data) }

func Uint32ToBytes(data []uint32) []byte { return Slice[byte](data) }

func Uint64ToBytes(data []uint64) []byte { return Slice[byte](data) }

func Uint128ToBytes(data [][16]byte) []byte { return Slice[byte](data) }

func Int16ToUint16(data []int16) []uint16 { return Slice[uint16](data) }

func Int32ToUint32(data []int32) []uint32 { return Slice[uint32](data) }

func Int64ToUint64(data []int64) []uint64 { return Slice[uint64](data) }

func Float32ToUint32(data []float32) []uint32 { return Slice[uint32](data) }

func Float64ToUint64(data []float64) []uint64 { return Slice[uint64](data) }

func Uint32ToInt32(data []uint32) []int32 { return Slice[int32](data) }

func Uint32ToInt64(data []uint32) []int64 { return Slice[int64](data) }

func Uint64ToInt64(data []uint64) []int64 { return Slice[int64](data) }

func BytesToBool(data []byte) []bool { return Slice[bool](data) }

func BytesToInt8(data []byte) []int8 { return Slice[int8](data) }

func BytesToInt16(data []byte) []int16 { return Slice[int16](data) }

func BytesToInt32(data []byte) []int32 { return Slice[int32](data) }

func BytesToInt64(data []byte) []int64 { return Slice[int64](data) }

func BytesToUint32(data []byte) []uint32 { return Slice[uint32](data) }

func BytesToUint64(data []byte) []uint64 { return Slice[uint64](data) }

func BytesToUint128(data []byte) [][16]byte { return Slice[[16]byte](data) }

func BytesToFloat32(data []byte) []float32 { return Slice[float32](data) }

func BytesToFloat64(data []byte) []float64 { return Slice[float64](data) }


================================================
FILE: internal/unsafecast/unsafecast_go18_test.go
================================================
//go:build go1.18

package unsafecast_test

import (
	"testing"

	"github.com/segmentio/parquet-go/internal/unsafecast"
)

func TestUnsafeCastSlice(t *testing.T) {
	a := make([]uint32, 4, 13)
	a[0] = 1
	a[1] = 0
	a[2] = 2
	a[3] = 0

	b := unsafecast.Slice[int64](a)
	if len(b) != 2 { // (4 * sizeof(uint32)) / sizeof(int64)
		t.Fatalf("length mismatch: want=2 got=%d", len(b))
	}
	if cap(b) != 6 { // (13 * sizeof(uint32)) / sizeof(int64)
		t.Fatalf("capacity mismatch: want=7 got=%d", cap(b))
	}
	if b[0] != 1 {
		t.Errorf("wrong value at index 0: want=1 got=%d", b[0])
	}
	if b[1] != 2 {
		t.Errorf("wrong value at index 1: want=2 got=%d", b[1])
	}

	c := unsafecast.Slice[uint32](b)
	if len(c) != 4 {
		t.Fatalf("length mismatch: want=2 got=%d", len(b))
	}
	if cap(c) != 12 {
		t.Fatalf("capacity mismatch: want=7 got=%d", cap(b))
	}
	for i := range c {
		if c[i] != a[i] {
			t.Errorf("wrong value at index %d: want=%d got=%d", i, a[i], c[i])
		}
	}
}


================================================
FILE: level.go
================================================
package parquet

import "github.com/segmentio/parquet-go/internal/bytealg"

func countLevelsEqual(levels []byte, value byte) int {
	return bytealg.Count(levels, value)
}

func countLevelsNotEqual(levels []byte, value byte) int {
	return len(levels) - countLevelsEqual(levels, value)
}

func appendLevel(levels []byte, value byte, count int) []byte {
	i := len(levels)
	n := len(levels) + count

	if cap(levels) < n {
		newLevels := make([]byte, n, 2*n)
		copy(newLevels, levels)
		levels = newLevels
	} else {
		levels = levels[:n]
	}

	bytealg.Broadcast(levels[i:], value)
	return levels
}


================================================
FILE: limits.go
================================================
package parquet

import (
	"fmt"
	"math"
)

const (
	// MaxColumnDepth is the maximum column depth supported by this package.
	MaxColumnDepth = math.MaxInt8

	// MaxColumnIndex is the maximum column index supported by this package.
	MaxColumnIndex = math.MaxInt16

	// MaxRepetitionLevel is the maximum repetition level supported by this
	// package.
	MaxRepetitionLevel = math.MaxUint8

	// MaxDefinitionLevel is the maximum definition level supported by this
	// package.
	MaxDefinitionLevel = math.MaxUint8

	// MaxRowGroups is the maximum number of row groups which can be contained
	// in a single parquet file.
	//
	// This limit is enforced by the use of 16 bits signed integers in the file
	// metadata footer of parquet files. It is part of the parquet specification
	// and therefore cannot be changed.
	MaxRowGroups = math.MaxInt16
)

const (
	estimatedSizeOfByteArrayValues = 20
)

func makeRepetitionLevel(i int) byte {
	checkIndexRange("repetition level", i, 0, MaxRepetitionLevel)
	return byte(i)
}

func makeDefinitionLevel(i int) byte {
	checkIndexRange("definition level", i, 0, MaxDefinitionLevel)
	return byte(i)
}

func makeColumnIndex(i int) int16 {
	checkIndexRange("column index", i, 0, MaxColumnIndex)
	return int16(i)
}

func makeNumValues(i int) int32 {
	checkIndexRange("number of values", i, 0, math.MaxInt32)
	return int32(i)
}

func checkIndexRange(typ string, i, min, max int) {
	if i < min || i > max {
		panic(errIndexOutOfRange(typ, i, min, max))
	}
}

func errIndexOutOfRange(typ string, i, min, max int) error {
	return fmt.Errorf("%s out of range: %d not in [%d:%d]", typ, i, min, max)
}


================================================
FILE: merge.go
================================================
package parquet

import (
	"container/heap"
	"fmt"
	"io"
)

// MergeRowGroups constructs a row group which is a merged view of rowGroups. If
// rowGroups are sorted and the passed options include sorting, the merged row
// group will also be sorted.
//
// The function validates the input to ensure that the merge operation is
// possible, ensuring that the schemas match or can be converted to an
// optionally configured target schema passed as argument in the option list.
//
// The sorting columns of each row group are also consulted to determine whether
// the output can be represented. If sorting columns are configured on the merge
// they must be a prefix of sorting columns of all row groups being merged.
func MergeRowGroups(rowGroups []RowGroup, options ...RowGroupOption) (RowGroup, error) {
	config, err := NewRowGroupConfig(options...)
	if err != nil {
		return nil, err
	}

	schema := config.Schema
	if len(rowGroups) == 0 {
		return newEmptyRowGroup(schema), nil
	}
	if schema == nil {
		schema = rowGroups[0].Schema()

		for _, rowGroup := range rowGroups[1:] {
			if !nodesAreEqual(schema, rowGroup.Schema()) {
				return nil, ErrRowGroupSchemaMismatch
			}
		}
	}

	mergedRowGroups := make([]RowGroup, len(rowGroups))
	copy(mergedRowGroups, rowGroups)

	for i, rowGroup := range mergedRowGroups {
		if rowGroupSchema := rowGroup.Schema(); !nodesAreEqual(schema, rowGroupSchema) {
			conv, err := Convert(schema, rowGroupSchema)
			if err != nil {
				return nil, fmt.Errorf("cannot merge row groups: %w", err)
			}
			mergedRowGroups[i] = ConvertRowGroup(rowGroup, conv)
		}
	}

	m := &mergedRowGroup{sorting: config.Sorting.SortingColumns}
	m.init(schema, mergedRowGroups)

	if len(m.sorting) == 0 {
		// When the row group has no ordering, use a simpler version of the
		// merger which simply concatenates rows from each of the row groups.
		// This is preferable because it makes the output deterministic, the
		// heap merge may otherwise reorder rows across groups.
		return &m.multiRowGroup, nil
	}

	for _, rowGroup := range m.rowGroups {
		if !sortingColumnsHavePrefix(rowGroup.SortingColumns(), m.sorting) {
			return nil, ErrRowGroupSortingColumnsMismatch
		}
	}

	m.compare = compareRowsFuncOf(schema, m.sorting)
	return m, nil
}

type mergedRowGroup struct {
	multiRowGroup
	sorting []SortingColumn
	compare func(Row, Row) int
}

func (m *mergedRowGroup) SortingColumns() []SortingColumn {
	return m.sorting
}

func (m *mergedRowGroup) Rows() Rows {
	// The row group needs to respect a sorting order; the merged row reader
	// uses a heap to merge rows from the row groups.
	rows := make([]Rows, len(m.rowGroups))
	for i := range rows {
		rows[i] = m.rowGroups[i].Rows()
	}
	return &mergedRowGroupRows{
		merge: mergedRowReader{
			compare: m.compare,
			readers: makeBufferedRowReaders(len(rows), func(i int) RowReader { return rows[i] }),
		},
		rows:   rows,
		schema: m.schema,
	}
}

type mergedRowGroupRows struct {
	merge     mergedRowReader
	rowIndex  int64
	seekToRow int64
	rows      []Rows
	schema    *Schema
}

func (r *mergedRowGroupRows) readInternal(rows []Row) (int, error) {
	n, err := r.merge.ReadRows(rows)
	r.rowIndex += int64(n)
	return n, err
}

func (r *mergedRowGroupRows) Close() (lastErr error) {
	r.merge.close()
	r.rowIndex = 0
	r.seekToRow = 0

	for _, rows := range r.rows {
		if err := rows.Close(); err != nil {
			lastErr = err
		}
	}

	return lastErr
}

func (r *mergedRowGroupRows) ReadRows(rows []Row) (int, error) {
	for r.rowIndex < r.seekToRow {
		n := int(r.seekToRow - r.rowIndex)
		if n > len(rows) {
			n = len(rows)
		}
		n, err := r.readInternal(rows[:n])
		if err != nil {
			return 0, err
		}
	}

	return r.readInternal(rows)
}

func (r *mergedRowGroupRows) SeekToRow(rowIndex int64) error {
	if rowIndex >= r.rowIndex {
		r.seekToRow = rowIndex
		return nil
	}
	return fmt.Errorf("SeekToRow: merged row reader cannot seek backward from row %d to %d", r.rowIndex, rowIndex)
}

func (r *mergedRowGroupRows) Schema() *Schema {
	return r.schema
}

// MergeRowReader constructs a RowReader which creates an ordered sequence of
// all the readers using the given compare function as the ordering predicate.
func MergeRowReaders(readers []RowReader, compare func(Row, Row) int) RowReader {
	return &mergedRowReader{
		compare: compare,
		readers: makeBufferedRowReaders(len(readers), func(i int) RowReader { return readers[i] }),
	}
}

func makeBufferedRowReaders(numReaders int, readerAt func(int) RowReader) []*bufferedRowReader {
	buffers := make([]bufferedRowReader, numReaders)
	readers := make([]*bufferedRowReader, numReaders)

	for i := range readers {
		buffers[i].rows = readerAt(i)
		readers[i] = &buffers[i]
	}

	return readers
}

type mergedRowReader struct {
	compare     func(Row, Row) int
	readers     []*bufferedRowReader
	initialized bool
}

func (m *mergedRowReader) initialize() error {
	for i, r := range m.readers {
		switch err := r.read(); err {
		case nil:
		case io.EOF:
			m.readers[i] = nil
		default:
			m.readers = nil
			return err
		}
	}

	n := 0
	for _, r := range m.readers {
		if r != nil {
			m.readers[n] = r
			n++
		}
	}

	clear := m.readers[n:]
	for i := range clear {
		clear[i] = nil
	}

	m.readers = m.readers[:n]
	heap.Init(m)
	return nil
}

func (m *mergedRowReader) close() {
	for _, r := range m.readers {
		r.close()
	}
	m.readers = nil
}

func (m *mergedRowReader) ReadRows(rows []Row) (n int, err error) {
	if !m.initialized {
		m.initialized = true

		if err := m.initialize(); err != nil {
			return 0, err
		}
	}

	for n < len(rows) && len(m.readers) != 0 {
		r := m.readers[0]

		rows[n] = append(rows[n][:0], r.head()...)
		n++

		if err := r.next(); err != nil {
			if err != io.EOF {
				return n, err
			}
			heap.Pop(m)
		} else {
			heap.Fix(m, 0)
		}
	}

	if len(m.readers) == 0 {
		err = io.EOF
	}

	return n, err
}

func (m *mergedRowReader) Less(i, j int) bool {
	return m.compare(m.readers[i].head(), m.readers[j].head()) < 0
}

func (m *mergedRowReader) Len() int {
	return len(m.readers)
}

func (m *mergedRowReader) Swap(i, j int) {
	m.readers[i], m.readers[j] = m.readers[j], m.readers[i]
}

func (m *mergedRowReader) Push(x interface{}) {
	panic("NOT IMPLEMENTED")
}

func (m *mergedRowReader) Pop() interface{} {
	i := len(m.readers) - 1
	r := m.readers[i]
	m.readers = m.readers[:i]
	return r
}

type bufferedRowReader struct {
	rows RowReader
	off  int32
	end  int32
	buf  [10]Row
}

func (r *bufferedRowReader) head() Row {
	return r.buf[r.off]
}

func (r *bufferedRowReader) next() error {
	if r.off++; r.off == r.end {
		r.off = 0
		r.end = 0
		return r.read()
	}
	return nil
}

func (r *bufferedRowReader) read() error {
	if r.rows == nil {
		return io.EOF
	}
	n, err := r.rows.ReadRows(r.buf[r.end:])
	if err != nil && n == 0 {
		return err
	}
	r.end += int32(n)
	return nil
}

func (r *bufferedRowReader) close() {
	r.rows = nil
	r.off = 0
	r.end = 0
}

var (
	_ RowReaderWithSchema = (*mergedRowGroupRows)(nil)
)


================================================
FILE: merge_test.go
================================================
package parquet_test

import (
	"bytes"
	"errors"
	"fmt"
	"io"
	"math/rand"
	"sort"
	"testing"

	"github.com/segmentio/parquet-go"
)

const (
	numRowGroups = 3
	rowsPerGroup = benchmarkNumRows
)

type wrappedRowGroup struct {
	parquet.RowGroup
	rowsCallback func(parquet.Rows) parquet.Rows
}

func (r wrappedRowGroup) Rows() parquet.Rows {
	return r.rowsCallback(r.RowGroup.Rows())
}

type wrappedRows struct {
	parquet.Rows
	closed bool
}

func (r *wrappedRows) Close() error {
	r.closed = true
	return r.Rows.Close()
}

func TestMergeRowGroups(t *testing.T) {
	tests := []struct {
		scenario string
		options  []parquet.RowGroupOption
		input    []parquet.RowGroup
		output   parquet.RowGroup
	}{
		{
			scenario: "no row groups",
			options: []parquet.RowGroupOption{
				parquet.SchemaOf(Person{}),
			},
			output: sortedRowGroup(
				[]parquet.RowGroupOption{
					parquet.SchemaOf(Person{}),
				},
			),
		},

		{
			scenario: "a single row group",
			input: []parquet.RowGroup{
				sortedRowGroup(nil,
					Person{FirstName: "some", LastName: "one", Age: 30},
					Person{FirstName: "some", LastName: "one else", Age: 31},
					Person{FirstName: "and", LastName: "you", Age: 32},
				),
			},
			output: sortedRowGroup(nil,
				Person{FirstName: "some", LastName: "one", Age: 30},
				Person{FirstName: "some", LastName: "one else", Age: 31},
				Person{FirstName: "and", LastName: "you", Age: 32},
			),
		},

		{
			scenario: "two row groups without ordering",
			input: []parquet.RowGroup{
				sortedRowGroup(nil, Person{FirstName: "some", LastName: "one", Age: 30}),
				sortedRowGroup(nil, Person{FirstName: "some", LastName: "one else", Age: 31}),
			},
			output: sortedRowGroup(nil,
				Person{FirstName: "some", LastName: "one", Age: 30},
				Person{FirstName: "some", LastName: "one else", Age: 31},
			),
		},

		{
			scenario: "three row groups without ordering",
			input: []parquet.RowGroup{
				sortedRowGroup(nil, Person{FirstName: "some", LastName: "one", Age: 30}),
				sortedRowGroup(nil, Person{FirstName: "some", LastName: "one else", Age: 31}),
				sortedRowGroup(nil, Person{FirstName: "question", LastName: "answer", Age: 42}),
			},
			output: sortedRowGroup(nil,
				Person{FirstName: "some", LastName: "one", Age: 30},
				Person{FirstName: "some", LastName: "one else", Age: 31},
				Person{FirstName: "question", LastName: "answer", Age: 42},
			),
		},

		{
			scenario: "row groups sorted by ascending last name",
			options: []parquet.RowGroupOption{
				parquet.SortingRowGroupConfig(
					parquet.SortingColumns(
						parquet.Ascending("LastName"),
					),
				),
			},
			input: []parquet.RowGroup{
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Ascending("LastName"),
							),
						),
					},
					Person{FirstName: "Han", LastName: "Solo"},
					Person{FirstName: "Luke", LastName: "Skywalker"},
				),
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Ascending("LastName"),
							),
						),
					},
					Person{FirstName: "Obiwan", LastName: "Kenobi"},
				),
			},
			output: sortedRowGroup(nil,
				Person{FirstName: "Obiwan", LastName: "Kenobi"},
				Person{FirstName: "Luke", LastName: "Skywalker"},
				Person{FirstName: "Han", LastName: "Solo"},
			),
		},

		{
			scenario: "row groups sorted by descending last name",
			options: []parquet.RowGroupOption{
				parquet.SortingRowGroupConfig(
					parquet.SortingColumns(
						parquet.Descending("LastName"),
					),
				),
			},
			input: []parquet.RowGroup{
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Descending("LastName"),
							),
						),
					},
					Person{FirstName: "Han", LastName: "Solo"},
					Person{FirstName: "Luke", LastName: "Skywalker"},
				),
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Descending("LastName"),
							),
						),
					},
					Person{FirstName: "Obiwan", LastName: "Kenobi"},
				),
			},
			output: sortedRowGroup(nil,
				Person{FirstName: "Han", LastName: "Solo"},
				Person{FirstName: "Luke", LastName: "Skywalker"},
				Person{FirstName: "Obiwan", LastName: "Kenobi"},
			),
		},

		{
			scenario: "row groups sorted by ascending last and first name",
			options: []parquet.RowGroupOption{
				parquet.SortingRowGroupConfig(
					parquet.SortingColumns(
						parquet.Ascending("LastName"),
						parquet.Ascending("FirstName"),
					),
				),
			},
			input: []parquet.RowGroup{
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Ascending("LastName"),
								parquet.Ascending("FirstName"),
							),
						),
					},
					Person{FirstName: "Luke", LastName: "Skywalker"},
					Person{FirstName: "Han", LastName: "Solo"},
				),
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Ascending("LastName"),
								parquet.Ascending("FirstName"),
							),
						),
					},
					Person{FirstName: "Obiwan", LastName: "Kenobi"},
					Person{FirstName: "Anakin", LastName: "Skywalker"},
				),
			},
			output: sortedRowGroup(nil,
				Person{FirstName: "Obiwan", LastName: "Kenobi"},
				Person{FirstName: "Anakin", LastName: "Skywalker"},
				Person{FirstName: "Luke", LastName: "Skywalker"},
				Person{FirstName: "Han", LastName: "Solo"},
			),
		},

		{
			scenario: "row groups with conversion to a different schema",
			options: []parquet.RowGroupOption{
				parquet.SchemaOf(LastNameOnly{}),
				parquet.SortingRowGroupConfig(
					parquet.SortingColumns(
						parquet.Ascending("LastName"),
					),
				),
			},
			input: []parquet.RowGroup{
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Ascending("LastName"),
							),
						),
					},
					Person{FirstName: "Han", LastName: "Solo"},
					Person{FirstName: "Luke", LastName: "Skywalker"},
				),
				sortedRowGroup(
					[]parquet.RowGroupOption{
						parquet.SortingRowGroupConfig(
							parquet.SortingColumns(
								parquet.Ascending("LastName"),
							),
						),
					},
					Person{FirstName: "Obiwan", LastName: "Kenobi"},
					Person{FirstName: "Anakin", LastName: "Skywalker"},
				),
			},
			output: sortedRowGroup(
				[]parquet.RowGroupOption{
					parquet.SortingRowGroupConfig(
						parquet.SortingColumns(
							parquet.Ascending("LastName"),
						),
					),
				},
				LastNameOnly{LastName: "Solo"},
				LastNameOnly{LastName: "Skywalker"},
				LastNameOnly{LastName: "Skywalker"},
				LastNameOnly{LastName: "Kenobi"},
			),
		},
	}

	for _, adapter := range []struct {
		scenario string
		function func(parquet.RowGroup) parquet.RowGroup
	}{
		{scenario: "buffer", function: selfRowGroup},
		{scenario: "file", function: fileRowGroup},
	} {
		t.Run(adapter.scenario, func(t *testing.T) {
			for _, test := range tests {
				t.Run(test.scenario, func(t *testing.T) {
					input := make([]parquet.RowGroup, len(test.input))
					for i := range test.input {
						input[i] = adapter.function(test.input[i])
					}

					merged, err := parquet.MergeRowGroups(test.input, test.options...)
					if err != nil {
						t.Fatal(err)
					}
					if merged.NumRows() != test.output.NumRows() {
						t.Fatalf("the number of rows mismatch: want=%d got=%d", merged.NumRows(), test.output.NumRows())
					}
					if merged.Schema() != test.output.Schema() {
						t.Fatalf("the row group schemas mismatch:\n%v\n%v", test.output.Schema(), merged.Schema())
					}

					options := []parquet.RowGroupOption{parquet.SchemaOf(Person{})}
					options = append(options, test.options...)
					// We test two views of the resulting row group: the one originally
					// returned by MergeRowGroups, and one where the merged row group
					// has been copied into a new buffer. The intent is to exercise both
					// the row-by-row read as well as optimized code paths when CopyRows
					// bypasses the ReadRow/WriteRow calls and the row group is written
					// directly to the buffer by calling WriteRowsTo/WriteRowGroup.
					mergedCopy := parquet.NewBuffer(options...)

					totalRows := test.output.NumRows()
					numRows, err := copyRowsAndClose(mergedCopy, merged.Rows())
					if err != nil {
						t.Fatal(err)
					}
					if numRows != totalRows {
						t.Fatalf("wrong number of rows copied: want=%d got=%d", totalRows, numRows)
					}

					for _, merge := range []struct {
						scenario string
						rowGroup parquet.RowGroup
					}{
						{scenario: "self", rowGroup: merged},
						{scenario: "copy", rowGroup: mergedCopy},
					} {
						t.Run(merge.scenario, func(t *testing.T) {
							var expectedRows = test.output.Rows()
							var mergedRows = merge.rowGroup.Rows()
							var row1 = make([]parquet.Row, 1)
							var row2 = make([]parquet.Row, 1)
							var numRows int64

							defer expectedRows.Close()
							defer mergedRows.Close()

							for {
								_, err1 := expectedRows.ReadRows(row1)
								n, err2 := mergedRows.ReadRows(row2)

								if err1 != err2 {
									// ReadRows may or may not return io.EOF
									// when it reads the last row, so we test
									// that the reference RowReader has also
									// reached the end.
									if err1 == nil && err2 == io.EOF {
										_, err1 = expectedRows.ReadRows(row1[:0])
									}
									if err1 != io.EOF {
										t.Fatalf("errors mismatched while comparing row %d/%d: want=%v got=%v", numRows, totalRows, err1, err2)
									}
								}

								if n != 0 {
									if !row1[0].Equal(row2[0]) {
										t.Errorf("row at index %d/%d mismatch: want=%+v got=%+v", numRows, totalRows, row1[0], row2[0])
									}
									numRows++
								}

								if err1 != nil {
									break
								}
							}

							if numRows != totalRows {
								t.Errorf("expected to read %d rows but %d were found", totalRows, numRows)
							}
						})
					}

				})
			}
		})
	}
}

func TestMergeRowGroupsCursorsAreClosed(t *testing.T) {
	type model struct {
		A int
	}

	schema := parquet.SchemaOf(model{})
	options := []parquet.RowGroupOption{
		parquet.SortingRowGroupConfig(
			parquet.SortingColumns(
				parquet.Ascending(schema.Columns()[0]...),
			),
		),
	}

	prng := rand.New(rand.NewSource(0))
	rowGroups := make([]parquet.RowGroup, numRowGroups)
	rows := make([]*wrappedRows, 0, numRowGroups)

	for i := range rowGroups {
		rowGroups[i] = wrappedRowGroup{
			RowGroup: sortedRowGroup(options, randomRowsOf(prng, rowsPerGroup, model{})...),
			rowsCallback: func(r parquet.Rows) parquet.Rows {
				wrapped := &wrappedRows{Rows: r}
				rows = append(rows, wrapped)
				return wrapped
			},
		}
	}

	m, err := parquet.MergeRowGroups(rowGroups, options...)
	if err != nil {
		t.Fatal(err)
	}
	func() {
		mergedRows := m.Rows()
		defer mergedRows.Close()

		// Add 1 more slot to the buffer to force an io.EOF on the first read.
		rbuf := make([]parquet.Row, (numRowGroups*rowsPerGroup)+1)
		if _, err := mergedRows.ReadRows(rbuf); !errors.Is(err, io.EOF) {
			t.Fatal(err)
		}
	}()

	for i, wrapped := range rows {
		if !wrapped.closed {
			t.Fatalf("RowGroup %d not closed", i)
		}
	}
}

func TestMergeRowGroupsSeekToRow(t *testing.T) {
	type model struct {
		A int
	}

	schema := parquet.SchemaOf(model{})
	options := []parquet.RowGroupOption{
		parquet.SortingRowGroupConfig(
			parquet.SortingColumns(
				parquet.Ascending(schema.Columns()[0]...),
			),
		),
	}

	rowGroups := make([]parquet.RowGroup, numRowGroups)

	counter := 0
	for i := range rowGroups {
		rows := make([]interface{}, 0, rowsPerGroup)
		for j := 0; j < rowsPerGroup; j++ {
			rows = append(rows, model{A: counter})
			counter++
		}
		rowGroups[i] = sortedRowGroup(options, rows...)
	}

	m, err := parquet.MergeRowGroups(rowGroups, options...)
	if err != nil {
		t.Fatal(err)
	}

	func() {
		mergedRows := m.Rows()
		defer mergedRows.Close()

		rbuf := make([]parquet.Row, 1)
		cursor := int64(0)
		for {
			if err := mergedRows.SeekToRow(cursor); err != nil {
				t.Fatal(err)
			}

			if _, err := mergedRows.ReadRows(rbuf); err != nil {
				if errors.Is(err, io.EOF) {
					break
				}
				t.Fatal(err)
			}
			v := model{}
			if err := schema.Reconstruct(&v, rbuf[0]); err != nil {
				t.Fatal(err)
			}
			if v.A != int(cursor) {
				t.Fatalf("expected value %d, got %d", cursor, v.A)
			}

			cursor++
		}
	}()
}

func BenchmarkMergeRowGroups(b *testing.B) {
	for _, test := range readerTests {
		b.Run(test.scenario, func(b *testing.B) {
			schema := parquet.SchemaOf(test.model)

			options := []parquet.RowGroupOption{
				parquet.SortingRowGroupConfig(
					parquet.SortingColumns(
						parquet.Ascending(schema.Columns()[0]...),
					),
				),
			}

			prng := rand.New(rand.NewSource(0))
			rowGroups := make([]parquet.RowGroup, numRowGroups)

			for i := range rowGroups {
				rowGroups[i] = sortedRowGroup(options, randomRowsOf(prng, rowsPerGroup, test.model)...)
			}

			for n := 1; n <= numRowGroups; n++ {
				b.Run(fmt.Sprintf("groups=%d,rows=%d", n, n*rowsPerGroup), func(b *testing.B) {
					mergedRowGroup, err := parquet.MergeRowGroups(rowGroups[:n], options...)
					if err != nil {
						b.Fatal(err)
					}

					rows := mergedRowGroup.Rows()
					rbuf := make([]parquet.Row, benchmarkRowsPerStep)
					defer func() { rows.Close() }()

					benchmarkRowsPerSecond(b, func() int {
						n, err := rows.ReadRows(rbuf)
						if err != nil {
							if !errors.Is(err, io.EOF) {
								b.Fatal(err)
							}
							rows.Close()
							rows = mergedRowGroup.Rows()
						}
						return n
					})
				})
			}
		})
	}
}

func BenchmarkMergeFiles(b *testing.B) {
	rowGroupBuffers := make([]bytes.Buffer, numRowGroups)

	for _, test := range readerTests {
		b.Run(test.scenario, func(b *testing.B) {
			schema := parquet.SchemaOf(test.model)

			sortingOptions := []parquet.SortingOption{
				parquet.SortingColumns(
					parquet.Ascending(schema.Columns()[0]...),
				),
			}

			options := []parquet.RowGroupOption{
				schema,
				parquet.SortingRowGroupConfig(
					sortingOptions...,
				),
			}

			buffer := parquet.NewBuffer(options...)

			prng := rand.New(rand.NewSource(0))
			files := make([]*parquet.File, numRowGroups)
			rowGroups := make([]parquet.RowGroup, numRowGroups)

			for i := range files {
				for _, row := range randomRowsOf(prng, rowsPerGroup, test.model) {
					buffer.Write(row)
				}
				sort.Sort(buffer)
				rowGroupBuffers[i].Reset()
				writer := parquet.NewWriter(&rowGroupBuffers[i],
					schema,
					parquet.SortingWriterConfig(
						sortingOptions...,
					),
				)
				_, err := copyRowsAndClose(writer, buffer.Rows())
				if err != nil {
					b.Fatal(err)
				}
				if err := writer.Close(); err != nil {
					b.Fatal(err)
				}
				r := bytes.NewReader(rowGroupBuffers[i].Bytes())
				f, err := parquet.OpenFile(r, r.Size())
				if err != nil {
					b.Fatal(err)
				}
				files[i], rowGroups[i] = f, f.RowGroups()[0]
			}

			for n := 1; n <= numRowGroups; n++ {
				b.Run(fmt.Sprintf("groups=%d,rows=%d", n, n*rowsPerGroup), func(b *testing.B) {
					mergedRowGroup, err := parquet.MergeRowGroups(rowGroups[:n], options...)
					if err != nil {
						b.Fatal(err)
					}

					rows := mergedRowGroup.Rows()
					rbuf := make([]parquet.Row, benchmarkRowsPerStep)
					defer func() { rows.Close() }()

					benchmarkRowsPerSecond(b, func() int {
						n, err := rows.ReadRows(rbuf)
						if err != nil {
							if !errors.Is(err, io.EOF) {
								b.Fatal(err)
							}
							rows.Close()
							rows = mergedRowGroup.Rows()
						}
						return n
					})

					totalSize := int64(0)
					for _, f := range files[:n] {
						totalSize += f.Size()
					}
				})
			}
		})
	}
}


================================================
FILE: multi_row_group.go
================================================
package parquet

import (
	"io"
)

// MultiRowGroup wraps multiple row groups to appear as if it was a single
// RowGroup. RowGroups must have the same schema or it will error.
func MultiRowGroup(rowGroups ...RowGroup) RowGroup {
	return newMultiRowGroup(ReadModeSync, rowGroups...)
}

func newMultiRowGroup(pageReadMode ReadMode, rowGroups ...RowGroup) RowGroup {
	if len(rowGroups) == 0 {
		return &emptyRowGroup{}
	}
	if len(rowGroups) == 1 {
		return rowGroups[0]
	}

	schema, err := compatibleSchemaOf(rowGroups)
	if err != nil {
		panic(err)
	}

	rowGroupsCopy := make([]RowGroup, len(rowGroups))
	copy(rowGroupsCopy, rowGroups)

	c := &multiRowGroup{
		pageReadMode: pageReadMode,
	}
	c.init(schema, rowGroupsCopy)
	return c
}

func (c *multiRowGroup) init(schema *Schema, rowGroups []RowGroup) error {
	columns := make([]multiColumnChunk, len(schema.Columns()))

	rowGroupColumnChunks := make([][]ColumnChunk, len(rowGroups))
	for i, rowGroup := range rowGroups {
		rowGroupColumnChunks[i] = rowGroup.ColumnChunks()
	}

	for i := range columns {
		columns[i].rowGroup = c
		columns[i].column = i
		columns[i].chunks = make([]ColumnChunk, len(rowGroupColumnChunks))

		for j, columnChunks := range rowGroupColumnChunks {
			columns[i].chunks[j] = columnChunks[i]
		}
	}

	c.schema = schema
	c.rowGroups = rowGroups
	c.columns = make([]ColumnChunk, len(columns))

	for i := range columns {
		c.columns[i] = &columns[i]
	}

	return nil
}

func compatibleSchemaOf(rowGroups []RowGroup) (*Schema, error) {
	schema := rowGroups[0].Schema()

	// Fast path: Many times all row groups have the exact same schema so a
	// pointer comparison is cheaper.
	samePointer := true
	for _, rowGroup := range rowGroups[1:] {
		if rowGroup.Schema() != schema {
			samePointer = false
			break
		}
	}
	if samePointer {
		return schema, nil
	}

	// Slow path: The schema pointers are not the same, but they still have to
	// be compatible.
	for _, rowGroup := range rowGroups[1:] {
		if !nodesAreEqual(schema, rowGroup.Schema()) {
			return nil, ErrRowGroupSchemaMismatch
		}
	}

	return schema, nil
}

type multiRowGroup struct {
	schema       *Schema
	rowGroups    []RowGroup
	columns      []ColumnChunk
	pageReadMode ReadMode
}

func (c *multiRowGroup) NumRows() (numRows int64) {
	for _, rowGroup := range c.rowGroups {
		numRows += rowGroup.NumRows()
	}
	return numRows
}

func (c *multiRowGroup) ColumnChunks() []ColumnChunk { return c.columns }

func (c *multiRowGroup) SortingColumns() []SortingColumn { return nil }

func (c *multiRowGroup) Schema() *Schema { return c.schema }

func (c *multiRowGroup) Rows() Rows { return newRowGroupRows(c, c.pageReadMode) }

type multiColumnChunk struct {
	rowGroup *multiRowGroup
	column   int
	chunks   []ColumnChunk
}

func (c *multiColumnChunk) Type() Type {
	if len(c.chunks) != 0 {
		return c.chunks[0].Type() // all chunks should be of the same type
	}
	return nil
}

func (c *multiColumnChunk) NumValues() int64 {
	n := int64(0)
	for i := range c.chunks {
		n += c.chunks[i].NumValues()
	}
	return n
}

func (c *multiColumnChunk) Column() int {
	return c.column
}

func (c *multiColumnChunk) Pages() Pages {
	return &multiPages{column: c}
}

func (c *multiColumnChunk) ColumnIndex() ColumnIndex {
	// TODO: implement
	return nil
}

func (c *multiColumnChunk) OffsetIndex() OffsetIndex {
	// TODO: implement
	return nil
}

func (c *multiColumnChunk) BloomFilter() BloomFilter {
	return multiBloomFilter{c}
}

type multiBloomFilter struct{ *multiColumnChunk }

func (f multiBloomFilter) ReadAt(b []byte, off int64) (int, error) {
	// TODO: add a test for this function
	i := 0

	for i < len(f.chunks) {
		if r := f.chunks[i].BloomFilter(); r != nil {
			size := r.Size()
			if off < size {
				break
			}
			off -= size
		}
		i++
	}

	if i == len(f.chunks) {
		return 0, io.EOF
	}

	rn := int(0)
	for len(b) > 0 {
		if r := f.chunks[i].BloomFilter(); r != nil {
			n, err := r.ReadAt(b, off)
			rn += n
			if err != nil {
				return rn, err
			}
			if b = b[n:]; len(b) == 0 {
				return rn, nil
			}
			off += int64(n)
		}
		i++
	}

	if i == len(f.chunks) {
		return rn, io.EOF
	}
	return rn, nil
}

func (f multiBloomFilter) Size() int64 {
	size := int64(0)
	for _, c := range f.chunks {
		if b := c.BloomFilter(); b != nil {
			size += b.Size()
		}
	}
	return size
}

func (f multiBloomFilter) Check(v Value) (bool, error) {
	for _, c := range f.chunks {
		if b := c.BloomFilter(); b != nil {
			if ok, err := b.Check(v); ok || err != nil {
				return ok, err
			}
		}
	}
	return false, nil
}

type multiPages struct {
	pages  Pages
	index  int
	column *multiColumnChunk
}

func (m *multiPages) ReadPage() (Page, error) {
	for {
		if m.pages != nil {
			p, err := m.pages.ReadPage()
			if err == nil || err != io.EOF {
				return p, err
			}
			if err := m.pages.Close(); err != nil {
				return nil, err
			}
			m.pages = nil
		}

		if m.column == nil || m.index == len(m.column.chunks) {
			return nil, io.EOF
		}

		m.pages = m.column.chunks[m.index].Pages()
		m.index++
	}
}

func (m *multiPages) SeekToRow(rowIndex int64) error {
	if m.column == nil {
		return io.ErrClosedPipe
	}

	if m.pages != nil {
		if err := m.pages.Close(); err != nil {
			return err
		}
	}

	rowGroups := m.column.rowGroup.rowGroups
	numRows := int64(0)
	m.pages = nil
	m.index = 0

	for m.index < len(rowGroups) {
		numRows = rowGroups[m.index].NumRows()
		if rowIndex < numRows {
			break
		}
		rowIndex -= numRows
		m.index++
	}

	if m.index < len(rowGroups) {
		m.pages = m.column.chunks[m.index].Pages()
		m.index++
		return m.pages.SeekToRow(rowIndex)
	}
	return nil
}

func (m *multiPages) Close() (err error) {
	if m.pages != nil {
		err = m.pages.Close()
	}
	m.pages = nil
	m.index = 0
	m.column = nil
	return err
}


================================================
FILE: node.go
================================================
package parquet

import (
	"reflect"
	"sort"
	"unicode"
	"unicode/utf8"

	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
)

// Node values represent nodes of a parquet schema.
//
// Nodes carry the type of values, as well as properties like whether the values
// are optional or repeat. Nodes with one or more children represent parquet
// groups and therefore do not have a logical type.
//
// Nodes are immutable values and therefore safe to use concurrently from
// multiple goroutines.
type Node interface {
	// Returns a human-readable representation of the parquet node.
	String() string

	// For leaf nodes, returns the type of values of the parquet column.
	//
	// Calling this method on non-leaf nodes will panic.
	Type() Type

	// Returns whether the parquet column is optional.
	Optional() bool

	// Returns whether the parquet column is repeated.
	Repeated() bool

	// Returns whether the parquet column is required.
	Required() bool

	// Returns true if this a leaf node.
	Leaf() bool

	// Returns a mapping of the node's fields.
	//
	// As an optimization, the same slices may be returned by multiple calls to
	// this method, programs must treat the returned values as immutable.
	//
	// This method returns an empty mapping when called on leaf nodes.
	Fields() []Field

	// Returns the encoding used by the node.
	//
	// The method may return nil to indicate that no specific encoding was
	// configured on the node, in which case a default encoding might be used.
	Encoding() encoding.Encoding

	// Returns compression codec used by the node.
	//
	// The method may return nil to indicate that no specific compression codec
	// was configured on the node, in which case a default compression might be
	// used.
	Compression() compress.Codec

	// Returns the Go type that best represents the parquet node.
	//
	// For leaf nodes, this will be one of bool, int32, int64, deprecated.Int96,
	// float32, float64, string, []byte, or [N]byte.
	//
	// For groups, the method returns a struct type.
	//
	// If the method is called on a repeated node, the method returns a slice of
	// the underlying type.
	//
	// For optional nodes, the method returns a pointer of the underlying type.
	//
	// For nodes that were constructed from Go values (e.g. using SchemaOf), the
	// method returns the original Go type.
	GoType() reflect.Type
}

// Field instances represent fields of a parquet node, which associate a node to
// their name in their parent node.
type Field interface {
	Node

	// Returns the name of this field in its parent node.
	Name() string

	// Given a reference to the Go value matching the structure of the parent
	// node, returns the Go value of the field.
	Value(base reflect.Value) reflect.Value
}

// Encoded wraps the node passed as argument to use the given encoding.
//
// The function panics if it is called on a non-leaf node, or if the
// encoding does not support the node type.
func Encoded(node Node, encoding encoding.Encoding) Node {
	if !node.Leaf() {
		panic("cannot add encoding to a non-leaf node")
	}
	if encoding != nil {
		kind := node.Type().Kind()
		if !canEncode(encoding, kind) {
			panic("cannot apply " + encoding.Encoding().String() + " to node of type " + kind.String())
		}
	}
	return &encodedNode{
		Node:     node,
		encoding: encoding,
	}
}

type encodedNode struct {
	Node
	encoding encoding.Encoding
}

func (n *encodedNode) Encoding() encoding.Encoding {
	return n.encoding
}

// Compressed wraps the node passed as argument to use the given compression
// codec.
//
// If the codec is nil, the node's compression is left unchanged.
//
// The function panics if it is called on a non-leaf node.
func Compressed(node Node, codec compress.Codec) Node {
	if !node.Leaf() {
		panic("cannot add compression codec to a non-leaf node")
	}
	return &compressedNode{
		Node:  node,
		codec: codec,
	}
}

type compressedNode struct {
	Node
	codec compress.Codec
}

func (n *compressedNode) Compression() compress.Codec {
	return n.codec
}

// Optional wraps the given node to make it optional.
func Optional(node Node) Node { return &optionalNode{node} }

type optionalNode struct{ Node }

func (opt *optionalNode) Optional() bool       { return true }
func (opt *optionalNode) Repeated() bool       { return false }
func (opt *optionalNode) Required() bool       { return false }
func (opt *optionalNode) GoType() reflect.Type { return reflect.PtrTo(opt.Node.GoType()) }

// Repeated wraps the given node to make it repeated.
func Repeated(node Node) Node { return &repeatedNode{node} }

type repeatedNode struct{ Node }

func (rep *repeatedNode) Optional() bool       { return false }
func (rep *repeatedNode) Repeated() bool       { return true }
func (rep *repeatedNode) Required() bool       { return false }
func (rep *repeatedNode) GoType() reflect.Type { return reflect.SliceOf(rep.Node.GoType()) }

// Required wraps the given node to make it required.
func Required(node Node) Node { return &requiredNode{node} }

type requiredNode struct{ Node }

func (req *requiredNode) Optional() bool       { return false }
func (req *requiredNode) Repeated() bool       { return false }
func (req *requiredNode) Required() bool       { return true }
func (req *requiredNode) GoType() reflect.Type { return req.Node.GoType() }

type node struct{}

// Leaf returns a leaf node of the given type.
func Leaf(typ Type) Node {
	return &leafNode{typ: typ}
}

type leafNode struct{ typ Type }

func (n *leafNode) String() string { return sprint("", n) }

func (n *leafNode) Type() Type { return n.typ }

func (n *leafNode) Optional() bool { return false }

func (n *leafNode) Repeated() bool { return false }

func (n *leafNode) Required() bool { return true }

func (n *leafNode) Leaf() bool { return true }

func (n *leafNode) Fields() []Field { return nil }

func (n *leafNode) Encoding() encoding.Encoding { return nil }

func (n *leafNode) Compression() compress.Codec { return nil }

func (n *leafNode) GoType() reflect.Type { return goTypeOfLeaf(n) }

var repetitionTypes = [...]format.FieldRepetitionType{
	0: format.Required,
	1: format.Optional,
	2: format.Repeated,
}

func fieldRepetitionTypePtrOf(node Node) *format.FieldRepetitionType {
	switch {
	case node.Required():
		return &repetitionTypes[format.Required]
	case node.Optional():
		return &repetitionTypes[format.Optional]
	case node.Repeated():
		return &repetitionTypes[format.Repeated]
	default:
		return nil
	}
}

func fieldRepetitionTypeOf(node Node) format.FieldRepetitionType {
	switch {
	case node.Optional():
		return format.Optional
	case node.Repeated():
		return format.Repeated
	default:
		return format.Required
	}
}

func applyFieldRepetitionType(t format.FieldRepetitionType, repetitionLevel, definitionLevel byte) (byte, byte) {
	switch t {
	case format.Optional:
		definitionLevel++
	case format.Repeated:
		repetitionLevel++
		definitionLevel++
	}
	return repetitionLevel, definitionLevel
}

type Group map[string]Node

func (g Group) String() string { return sprint("", g) }

func (g Group) Type() Type { return groupType{} }

func (g Group) Optional() bool { return false }

func (g Group) Repeated() bool { return false }

func (g Group) Required() bool { return true }

func (g Group) Leaf() bool { return false }

func (g Group) Fields() []Field {
	groupFields := make([]groupField, 0, len(g))
	for name, node := range g {
		groupFields = append(groupFields, groupField{
			Node: node,
			name: name,
		})
	}
	sort.Slice(groupFields, func(i, j int) bool {
		return groupFields[i].name < groupFields[j].name
	})
	fields := make([]Field, len(groupFields))
	for i := range groupFields {
		fields[i] = &groupFields[i]
	}
	return fields
}

func (g Group) Encoding() encoding.Encoding { return nil }

func (g Group) Compression() compress.Codec { return nil }

func (g Group) GoType() reflect.Type { return goTypeOfGroup(g) }

type groupField struct {
	Node
	name string
}

func (f *groupField) Name() string { return f.name }

func (f *groupField) Value(base reflect.Value) reflect.Value {
	return base.MapIndex(reflect.ValueOf(&f.name).Elem())
}

func goTypeOf(node Node) reflect.Type {
	switch {
	case node.Optional():
		return goTypeOfOptional(node)
	case node.Repeated():
		return goTypeOfRepeated(node)
	default:
		return goTypeOfRequired(node)
	}
}

func goTypeOfOptional(node Node) reflect.Type {
	return reflect.PtrTo(goTypeOfRequired(node))
}

func goTypeOfRepeated(node Node) reflect.Type {
	return reflect.SliceOf(goTypeOfRequired(node))
}

func goTypeOfRequired(node Node) reflect.Type {
	if node.Leaf() {
		return goTypeOfLeaf(node)
	} else {
		return goTypeOfGroup(node)
	}
}

func goTypeOfLeaf(node Node) reflect.Type {
	t := node.Type()
	if convertibleType, ok := t.(interface{ GoType() reflect.Type }); ok {
		return convertibleType.GoType()
	}
	switch t.Kind() {
	case Boolean:
		return reflect.TypeOf(false)
	case Int32:
		return reflect.TypeOf(int32(0))
	case Int64:
		return reflect.TypeOf(int64(0))
	case Int96:
		return reflect.TypeOf(deprecated.Int96{})
	case Float:
		return reflect.TypeOf(float32(0))
	case Double:
		return reflect.TypeOf(float64(0))
	case ByteArray:
		return reflect.TypeOf(([]byte)(nil))
	case FixedLenByteArray:
		return reflect.ArrayOf(t.Length(), reflect.TypeOf(byte(0)))
	default:
		panic("BUG: parquet type returned an unsupported kind")
	}
}

func goTypeOfGroup(node Node) reflect.Type {
	fields := node.Fields()
	structFields := make([]reflect.StructField, len(fields))
	for i, field := range fields {
		structFields[i].Name = exportedStructFieldName(field.Name())
		structFields[i].Type = field.GoType()
		// TODO: can we reconstruct a struct tag that would be valid if a value
		// of this type were passed to SchemaOf?
	}
	return reflect.StructOf(structFields)
}

func exportedStructFieldName(name string) string {
	firstRune, size := utf8.DecodeRuneInString(name)
	return string([]rune{unicode.ToUpper(firstRune)}) + name[size:]
}

func isList(node Node) bool {
	logicalType := node.Type().LogicalType()
	return logicalType != nil && logicalType.List != nil
}

func isMap(node Node) bool {
	logicalType := node.Type().LogicalType()
	return logicalType != nil && logicalType.Map != nil
}

func numLeafColumnsOf(node Node) int16 {
	return makeColumnIndex(numLeafColumns(node, 0))
}

func numLeafColumns(node Node, columnIndex int) int {
	if node.Leaf() {
		return columnIndex + 1
	}
	for _, field := range node.Fields() {
		columnIndex = numLeafColumns(field, columnIndex)
	}
	return columnIndex
}

func listElementOf(node Node) Node {
	if !node.Leaf() {
		if list := fieldByName(node, "list"); list != nil {
			if elem := fieldByName(list, "element"); elem != nil {
				return elem
			}
		}
	}
	panic("node with logical type LIST is not composed of a repeated .list.element")
}

func mapKeyValueOf(node Node) Node {
	if !node.Leaf() && (node.Required() || node.Optional()) {
		if keyValue := fieldByName(node, "key_value"); keyValue != nil && !keyValue.Leaf() && keyValue.Repeated() {
			k := fieldByName(keyValue, "key")
			v := fieldByName(keyValue, "value")
			if k != nil && v != nil && k.Required() {
				return keyValue
			}
		}
	}
	panic("node with logical type MAP is not composed of a repeated .key_value group with key and value fields")
}

func encodingOf(node Node) encoding.Encoding {
	encoding := node.Encoding()
	// The parquet-format documentation states that the
	// DELTA_LENGTH_BYTE_ARRAY is always preferred to PLAIN when
	// encoding BYTE_ARRAY values. We apply it as a default if
	// none were explicitly specified, which gives the application
	// the opportunity to override this behavior if needed.
	//
	// https://github.com/apache/parquet-format/blob/master/Encodings.md#delta-length-byte-array-delta_length_byte_array--6
	if node.Type().Kind() == ByteArray && encoding == nil {
		encoding = &DeltaLengthByteArray
	}
	if encoding == nil {
		encoding = &Plain
	}
	return encoding
}

func forEachNodeOf(name string, node Node, do func(string, Node)) {
	do(name, node)

	for _, f := range node.Fields() {
		forEachNodeOf(f.Name(), f, do)
	}
}

func fieldByName(node Node, name string) Field {
	for _, f := range node.Fields() {
		if f.Name() == name {
			return f
		}
	}
	return nil
}

func nodesAreEqual(node1, node2 Node) bool {
	if node1.Leaf() {
		return node2.Leaf() && leafNodesAreEqual(node1, node2)
	} else {
		return !node2.Leaf() && groupNodesAreEqual(node1, node2)
	}
}

func typesAreEqual(type1, type2 Type) bool {
	return type1.Kind() == type2.Kind() &&
		type1.Length() == type2.Length() &&
		reflect.DeepEqual(type1.LogicalType(), type2.LogicalType())
}

func repetitionsAreEqual(node1, node2 Node) bool {
	return node1.Optional() == node2.Optional() && node1.Repeated() == node2.Repeated()
}

func leafNodesAreEqual(node1, node2 Node) bool {
	return typesAreEqual(node1.Type(), node2.Type()) && repetitionsAreEqual(node1, node2)
}

func groupNodesAreEqual(node1, node2 Node) bool {
	fields1 := node1.Fields()
	fields2 := node2.Fields()

	if len(fields1) != len(fields2) {
		return false
	}

	if !repetitionsAreEqual(node1, node2) {
		return false
	}

	for i := range fields1 {
		f1 := fields1[i]
		f2 := fields2[i]

		if f1.Name() != f2.Name() {
			return false
		}

		if !nodesAreEqual(f1, f2) {
			return false
		}
	}

	return true
}


================================================
FILE: null.go
================================================
//go:build go1.18

package parquet

import (
	"reflect"
	"unsafe"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/internal/bytealg"
	"github.com/segmentio/parquet-go/internal/unsafecast"
	"github.com/segmentio/parquet-go/sparse"
)

// nullIndexFunc is the type of functions used to detect null values in rows.
//
// For each value of the rows array, the bitmap passed as first argument is
// populated to indicate whether the values were null (0) or not (1).
//
// The function writes one bit to the output buffer for each row in the input,
// the buffer must be sized accordingly.
type nullIndexFunc func(bits []uint64, rows sparse.Array)

func nullIndex[T comparable](bits []uint64, rows sparse.Array) {
	var zero T
	for i := 0; i < rows.Len(); i++ {
		v := *(*T)(rows.Index(i))
		if v != zero {
			x := uint(i) / 64
			y := uint(i) % 64
			bits[x] |= 1 << y
		}
	}
}

func nullIndexStruct(bits []uint64, rows sparse.Array) {
	bytealg.Broadcast(unsafecast.Slice[byte](bits), 0xFF)
}

func nullIndexFuncOf(t reflect.Type) nullIndexFunc {
	switch t {
	case reflect.TypeOf(deprecated.Int96{}):
		return nullIndex[deprecated.Int96]
	}

	switch t.Kind() {
	case reflect.Bool:
		return nullIndexBool

	case reflect.Int:
		return nullIndexInt

	case reflect.Int32:
		return nullIndexInt32

	case reflect.Int64:
		return nullIndexInt64

	case reflect.Uint:
		return nullIndexUint

	case reflect.Uint32:
		return nullIndexUint32

	case reflect.Uint64:
		return nullIndexUint64

	case reflect.Float32:
		return nullIndexFloat32

	case reflect.Float64:
		return nullIndexFloat64

	case reflect.String:
		return nullIndexString

	case reflect.Slice:
		return nullIndexSlice

	case reflect.Map:
		return nullIndexPointer

	case reflect.Array:
		if t.Elem().Kind() == reflect.Uint8 {
			switch size := t.Len(); size {
			case 16:
				return nullIndexUint128
			default:
				return nullIndexFuncOfByteArray(size)
			}
		}

	case reflect.Pointer:
		return nullIndexPointer

	case reflect.Struct:
		return nullIndexStruct
	}

	panic("cannot convert Go values of type " + typeNameOf(t) + " to parquet value")
}

func nullIndexFuncOfByteArray(n int) nullIndexFunc {
	return func(bits []uint64, rows sparse.Array) {
		for i := 0; i < rows.Len(); i++ {
			p := (*byte)(rows.Index(i))
			b := unsafe.Slice(p, n)
			if !isZero(b) {
				x := uint(i) / 64
				y := uint(i) % 64
				bits[x] |= 1 << y
			}
		}
	}
}


================================================
FILE: null_amd64.go
================================================
//go:build go1.18 && !purego

package parquet

import "github.com/segmentio/parquet-go/sparse"

//go:noescape
func nullIndex8(bits *uint64, rows sparse.Array)

//go:noescape
func nullIndex32(bits *uint64, rows sparse.Array)

//go:noescape
func nullIndex64(bits *uint64, rows sparse.Array)

//go:noescape
func nullIndex128(bits *uint64, rows sparse.Array)

func nullIndexBool(bits []uint64, rows sparse.Array) {
	nullIndex8(&bits[0], rows)
}

func nullIndexInt(bits []uint64, rows sparse.Array) {
	nullIndex64(&bits[0], rows)
}

func nullIndexInt32(bits []uint64, rows sparse.Array) {
	nullIndex32(&bits[0], rows)
}

func nullIndexInt64(bits []uint64, rows sparse.Array) {
	nullIndex64(&bits[0], rows)
}

func nullIndexUint(bits []uint64, rows sparse.Array) {
	nullIndex64(&bits[0], rows)
}

func nullIndexUint32(bits []uint64, rows sparse.Array) {
	nullIndex32(&bits[0], rows)
}

func nullIndexUint64(bits []uint64, rows sparse.Array) {
	nullIndex64(&bits[0], rows)
}

func nullIndexUint128(bits []uint64, rows sparse.Array) {
	nullIndex128(&bits[0], rows)
}

func nullIndexFloat32(bits []uint64, rows sparse.Array) {
	nullIndex32(&bits[0], rows)
}

func nullIndexFloat64(bits []uint64, rows sparse.Array) {
	nullIndex64(&bits[0], rows)
}

func nullIndexString(bits []uint64, rows sparse.Array) {
	// We offset by an extra 8 bytes to test the lengths of string values where
	// the first field is the pointer and the second is the length which we want
	// to test.
	nullIndex64(&bits[0], rows.Offset(8))
}

func nullIndexSlice(bits []uint64, rows sparse.Array) {
	// Slice values are null if their pointer is nil, which is held in the first
	// 8 bytes of the object so we can simply test 64 bits words.
	nullIndex64(&bits[0], rows)
}

func nullIndexPointer(bits []uint64, rows sparse.Array) {
	nullIndex64(&bits[0], rows)
}


================================================
FILE: null_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func nullIndex8(bits *uint64, rows sparse.Array)
TEXT ·nullIndex8(SB), NOSPLIT, $0-32
    MOVQ bits+0(FP), AX
    MOVQ rows_array_ptr+8(FP), BX
    MOVQ rows_array_len+16(FP), DI
    MOVQ rows_array_off+24(FP), DX

    MOVQ $1, CX
    XORQ SI, SI

    CMPQ DI, $0
    JE done
loop1x1:
    XORQ R8, R8
    MOVB (BX), R9
    CMPB R9, $0
    JE next1x1

    MOVQ SI, R10
    SHRQ $6, R10
    ORQ CX, (AX)(R10*8)
next1x1:
    ADDQ DX, BX
    ROLQ $1, CX
    INCQ SI
    CMPQ SI, DI
    JNE loop1x1
done:
    RET

// func nullIndex32(bits *uint64, rows sparse.Array)
TEXT ·nullIndex32(SB), NOSPLIT, $0-32
    MOVQ bits+0(FP), AX
    MOVQ rows_array_ptr+8(FP), BX
    MOVQ rows_array_len+16(FP), DI
    MOVQ rows_array_off+24(FP), DX

    MOVQ $1, CX
    XORQ SI, SI

    CMPQ DI, $0
    JE done

    CMPQ DI, $8
    JB loop1x4

    CMPB ·hasAVX2(SB), $0
    JE loop1x4

    MOVQ DI, R8
    SHRQ $3, R8
    SHLQ $3, R8

    VPBROADCASTD rows_array_off+24(FP), Y0
    VPMULLD ·range0n8(SB), Y0, Y0
    VPCMPEQD Y1, Y1, Y1
    VPCMPEQD Y2, Y2, Y2
    VPXOR Y3, Y3, Y3
loop8x4:
    VPGATHERDD Y1, (BX)(Y0*1), Y4
    VPCMPEQD Y3, Y4, Y4
    VMOVMSKPS Y4, R9
    VMOVDQU Y2, Y1

    NOTQ R9
    ANDQ $0b11111111, R9

    MOVQ SI, CX
    ANDQ $0b111111, CX

    MOVQ SI, R10
    SHRQ $6, R10

    SHLQ CX, R9
    ORQ R9, (AX)(R10*8)

    LEAQ (BX)(DX*8), BX
    ADDQ $8, SI
    CMPQ SI, R8
    JNE loop8x4
    VZEROUPPER

    CMPQ SI, DI
    JE done

    MOVQ $1, R8
    MOVQ SI, CX
    ANDQ $0b111111, R8
    SHLQ CX, R8
    MOVQ R8, CX

loop1x4:
    MOVL (BX), R8
    CMPL R8, $0
    JE next1x4

    MOVQ SI, R9
    SHRQ $6, R9
    ORQ CX, (AX)(R9*8)
next1x4:
    ADDQ DX, BX
    ROLQ $1, CX
    INCQ SI
    CMPQ SI, DI
    JNE loop1x4
done:
    RET

// func nullIndex64(bits *uint64, rows sparse.Array)
TEXT ·nullIndex64(SB), NOSPLIT, $0-32
    MOVQ bits+0(FP), AX
    MOVQ rows_array_ptr+8(FP), BX
    MOVQ rows_array_len+16(FP), DI
    MOVQ rows_array_off+24(FP), DX

    MOVQ $1, CX
    XORQ SI, SI

    CMPQ DI, $0
    JE done

    CMPQ DI, $4
    JB loop1x8

    CMPB ·hasAVX2(SB), $0
    JE loop1x8

    MOVQ DI, R8
    SHRQ $2, R8
    SHLQ $2, R8

    VPBROADCASTQ rows_array_off+24(FP), Y0
    VPMULLD scale4x8<>(SB), Y0, Y0
    VPCMPEQQ Y1, Y1, Y1
    VPCMPEQQ Y2, Y2, Y2
    VPXOR Y3, Y3, Y3
loop4x8:
    VPGATHERQQ Y1, (BX)(Y0*1), Y4
    VPCMPEQQ Y3, Y4, Y4
    VMOVMSKPD Y4, R9
    VMOVDQU Y2, Y1

    NOTQ R9
    ANDQ $0b1111, R9

    MOVQ SI, CX
    ANDQ $0b111111, CX

    MOVQ SI, R10
    SHRQ $6, R10

    SHLQ CX, R9
    ORQ R9, (AX)(R10*8)

    LEAQ (BX)(DX*4), BX
    ADDQ $4, SI
    CMPQ SI, R8
    JNE loop4x8
    VZEROUPPER

    CMPQ SI, DI
    JE done

    MOVQ $1, R8
    MOVQ SI, CX
    ANDQ $0b111111, R8
    SHLQ CX, R8
    MOVQ R8, CX

loop1x8:
    MOVQ (BX), R8
    CMPQ R8, $0
    JE next1x8

    MOVQ SI, R9
    SHRQ $6, R9
    ORQ CX, (AX)(R9*8)
next1x8:
    ADDQ DX, BX
    ROLQ $1, CX
    INCQ SI
    CMPQ SI, DI
    JNE loop1x8
done:
    RET

GLOBL scale4x8<>(SB), RODATA|NOPTR, $32
DATA scale4x8<>+0(SB)/8,  $0
DATA scale4x8<>+8(SB)/8,  $1
DATA scale4x8<>+16(SB)/8, $2
DATA scale4x8<>+24(SB)/8, $3

// func nullIndex128(bits *uint64, rows sparse.Array)
TEXT ·nullIndex128(SB), NOSPLIT, $0-32
    MOVQ bits+0(FP), AX
    MOVQ rows_array_ptr+8(FP), BX
    MOVQ rows_array_len+16(FP), DI
    MOVQ rows_array_off+24(FP), DX

    CMPQ DI, $0
    JE done

    MOVQ $1, CX
    XORQ SI, SI
    PXOR X0, X0
loop1x16:
    MOVOU (BX), X1
    PCMPEQQ X0, X1
    MOVMSKPD X1, R8
    CMPB R8, $0b11
    JE next1x16

    MOVQ SI, R9
    SHRQ $6, R9
    ORQ CX, (AX)(R9*8)
next1x16:
    ADDQ DX, BX
    ROLQ $1, CX
    INCQ SI
    CMPQ SI, DI
    JNE loop1x16
done:
    RET


================================================
FILE: null_purego.go
================================================
//go:build go1.18 && (purego || !amd64)

package parquet

import "github.com/segmentio/parquet-go/sparse"

func nullIndexBool(bits []uint64, rows sparse.Array) {
	nullIndex[bool](bits, rows)
}

func nullIndexInt(bits []uint64, rows sparse.Array) {
	nullIndex[int](bits, rows)
}

func nullIndexInt32(bits []uint64, rows sparse.Array) {
	nullIndex[int32](bits, rows)
}

func nullIndexInt64(bits []uint64, rows sparse.Array) {
	nullIndex[int64](bits, rows)
}

func nullIndexUint(bits []uint64, rows sparse.Array) {
	nullIndex[uint](bits, rows)
}

func nullIndexUint32(bits []uint64, rows sparse.Array) {
	nullIndex[uint32](bits, rows)
}

func nullIndexUint64(bits []uint64, rows sparse.Array) {
	nullIndex[uint64](bits, rows)
}

func nullIndexUint128(bits []uint64, rows sparse.Array) {
	nullIndex[[16]byte](bits, rows)
}

func nullIndexFloat32(bits []uint64, rows sparse.Array) {
	nullIndex[float32](bits, rows)
}

func nullIndexFloat64(bits []uint64, rows sparse.Array) {
	nullIndex[float64](bits, rows)
}

func nullIndexString(bits []uint64, rows sparse.Array) {
	nullIndex[string](bits, rows)
}

func nullIndexSlice(bits []uint64, rows sparse.Array) {
	for i := 0; i < rows.Len(); i++ {
		p := *(**struct{})(rows.Index(i))
		b := uint64(0)
		if p != nil {
			b = 1
		}
		bits[uint(i)/64] |= b << (uint(i) % 64)
	}
}

func nullIndexPointer(bits []uint64, rows sparse.Array) {
	nullIndex[*struct{}](bits, rows)
}


================================================
FILE: null_test.go
================================================
//go:build go1.18

package parquet

import (
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/internal/quick"
)

func TestNullIndex(t *testing.T) {
	testNullIndex[bool](t)
	testNullIndex[int](t)
	testNullIndex[int32](t)
	testNullIndex[int64](t)
	testNullIndex[uint](t)
	testNullIndex[uint32](t)
	testNullIndex[uint64](t)
	testNullIndex[float32](t)
	testNullIndex[float64](t)
	testNullIndex[[10]byte](t)
	testNullIndex[[16]byte](t)
	testNullIndex[deprecated.Int96](t)
	testNullIndex[string](t)
	testNullIndex[*struct{}](t)
}

func testNullIndex[T comparable](t *testing.T) {
	var zero T
	t.Helper()
	t.Run(reflect.TypeOf(zero).String(), func(t *testing.T) {
		err := quick.Check(func(data []T) bool {
			if len(data) == 0 {
				return true
			}

			want := make([]uint64, (len(data)+63)/64)
			got := make([]uint64, (len(data)+63)/64)

			for i := range data {
				if (i % 2) == 0 {
					data[i] = zero
				}
			}

			array := makeArrayOf(data)
			nullIndex[T](want, array)
			nullIndexFuncOf(reflect.TypeOf(zero))(got, array)

			if !reflect.DeepEqual(want, got) {
				t.Errorf("unexpected null index\nwant = %064b\ngot  = %064b", want, got)
				return false
			}
			return true
		})
		if err != nil {
			t.Error(err)
		}
	})
}

func BenchmarkNullIndex(b *testing.B) {
	benchmarkNullIndex[bool](b)
	benchmarkNullIndex[int](b)
	benchmarkNullIndex[int32](b)
	benchmarkNullIndex[int64](b)
	benchmarkNullIndex[uint](b)
	benchmarkNullIndex[uint32](b)
	benchmarkNullIndex[uint64](b)
	benchmarkNullIndex[float32](b)
	benchmarkNullIndex[float64](b)
	benchmarkNullIndex[[10]byte](b)
	benchmarkNullIndex[[16]byte](b)
	benchmarkNullIndex[deprecated.Int96](b)
	benchmarkNullIndex[string](b)
	benchmarkNullIndex[[]struct{}](b)
	benchmarkNullIndex[*struct{}](b)
}

func benchmarkNullIndex[T any](b *testing.B) {
	const N = 1000

	var zero T
	typ := reflect.TypeOf(zero)
	null := nullIndexFuncOf(typ)
	data := makeArrayOf(make([]T, N))
	bits := make([]uint64, (N+63)/64)

	b.Run(typ.String(), func(b *testing.B) {
		for i := 0; i < b.N; i++ {
			null(bits, data)
		}
		b.SetBytes(int64(typ.Size() * N))
	})
}


================================================
FILE: offset_index.go
================================================
package parquet

import (
	"github.com/segmentio/parquet-go/format"
)

type OffsetIndex interface {
	// NumPages returns the number of pages in the offset index.
	NumPages() int

	// Offset returns the offset starting from the beginning of the file for the
	// page at the given index.
	Offset(int) int64

	// CompressedPageSize returns the size of the page at the given index
	// (in bytes).
	CompressedPageSize(int) int64

	// FirstRowIndex returns the the first row in the page at the given index.
	//
	// The returned row index is based on the row group that the page belongs
	// to, the first row has index zero.
	FirstRowIndex(int) int64
}

type fileOffsetIndex format.OffsetIndex

func (i *fileOffsetIndex) NumPages() int {
	return len(i.PageLocations)
}

func (i *fileOffsetIndex) Offset(j int) int64 {
	return i.PageLocations[j].Offset
}

func (i *fileOffsetIndex) CompressedPageSize(j int) int64 {
	return int64(i.PageLocations[j].CompressedPageSize)
}

func (i *fileOffsetIndex) FirstRowIndex(j int) int64 {
	return i.PageLocations[j].FirstRowIndex
}

type emptyOffsetIndex struct{}

func (emptyOffsetIndex) NumPages() int                { return 0 }
func (emptyOffsetIndex) Offset(int) int64             { return 0 }
func (emptyOffsetIndex) CompressedPageSize(int) int64 { return 0 }
func (emptyOffsetIndex) FirstRowIndex(int) int64      { return 0 }

type booleanOffsetIndex struct{ page *booleanPage }

func (i booleanOffsetIndex) NumPages() int                { return 1 }
func (i booleanOffsetIndex) Offset(int) int64             { return 0 }
func (i booleanOffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i booleanOffsetIndex) FirstRowIndex(int) int64      { return 0 }

type int32OffsetIndex struct{ page *int32Page }

func (i int32OffsetIndex) NumPages() int                { return 1 }
func (i int32OffsetIndex) Offset(int) int64             { return 0 }
func (i int32OffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i int32OffsetIndex) FirstRowIndex(int) int64      { return 0 }

type int64OffsetIndex struct{ page *int64Page }

func (i int64OffsetIndex) NumPages() int                { return 1 }
func (i int64OffsetIndex) Offset(int) int64             { return 0 }
func (i int64OffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i int64OffsetIndex) FirstRowIndex(int) int64      { return 0 }

type int96OffsetIndex struct{ page *int96Page }

func (i int96OffsetIndex) NumPages() int                { return 1 }
func (i int96OffsetIndex) Offset(int) int64             { return 0 }
func (i int96OffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i int96OffsetIndex) FirstRowIndex(int) int64      { return 0 }

type floatOffsetIndex struct{ page *floatPage }

func (i floatOffsetIndex) NumPages() int                { return 1 }
func (i floatOffsetIndex) Offset(int) int64             { return 0 }
func (i floatOffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i floatOffsetIndex) FirstRowIndex(int) int64      { return 0 }

type doubleOffsetIndex struct{ page *doublePage }

func (i doubleOffsetIndex) NumPages() int                { return 1 }
func (i doubleOffsetIndex) Offset(int) int64             { return 0 }
func (i doubleOffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i doubleOffsetIndex) FirstRowIndex(int) int64      { return 0 }

type byteArrayOffsetIndex struct{ page *byteArrayPage }

func (i byteArrayOffsetIndex) NumPages() int                { return 1 }
func (i byteArrayOffsetIndex) Offset(int) int64             { return 0 }
func (i byteArrayOffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i byteArrayOffsetIndex) FirstRowIndex(int) int64      { return 0 }

type fixedLenByteArrayOffsetIndex struct{ page *fixedLenByteArrayPage }

func (i fixedLenByteArrayOffsetIndex) NumPages() int                { return 1 }
func (i fixedLenByteArrayOffsetIndex) Offset(int) int64             { return 0 }
func (i fixedLenByteArrayOffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i fixedLenByteArrayOffsetIndex) FirstRowIndex(int) int64      { return 0 }

type uint32OffsetIndex struct{ page *uint32Page }

func (i uint32OffsetIndex) NumPages() int                { return 1 }
func (i uint32OffsetIndex) Offset(int) int64             { return 0 }
func (i uint32OffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i uint32OffsetIndex) FirstRowIndex(int) int64      { return 0 }

type uint64OffsetIndex struct{ page *uint64Page }

func (i uint64OffsetIndex) NumPages() int                { return 1 }
func (i uint64OffsetIndex) Offset(int) int64             { return 0 }
func (i uint64OffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i uint64OffsetIndex) FirstRowIndex(int) int64      { return 0 }

type be128OffsetIndex struct{ page *be128Page }

func (i be128OffsetIndex) NumPages() int                { return 1 }
func (i be128OffsetIndex) Offset(int) int64             { return 0 }
func (i be128OffsetIndex) CompressedPageSize(int) int64 { return i.page.Size() }
func (i be128OffsetIndex) FirstRowIndex(int) int64      { return 0 }


================================================
FILE: order.go
================================================
package parquet

import (
	"bytes"

	"github.com/segmentio/parquet-go/internal/unsafecast"
)

func orderOfBool(data []bool) int {
	switch len(data) {
	case 0, 1:
		return 0
	default:
		k := 0
		i := 0

		if data[0] { // true => false: descending
			k = -1
			i = streakOfTrue(data)
			if i == len(data) {
				k = +1
			} else {
				i += streakOfFalse(data[i:])
			}
		} else { // false => true: ascending
			k = +1
			i = streakOfFalse(data)
			i += streakOfTrue(data[i:])
		}

		if i != len(data) {
			k = 0
		}
		return k
	}
}

func streakOfTrue(data []bool) int {
	if i := bytes.IndexByte(unsafecast.BoolToBytes(data), 0); i >= 0 {
		return i
	}
	return len(data)
}

func streakOfFalse(data []bool) int {
	if i := bytes.IndexByte(unsafecast.BoolToBytes(data), 1); i >= 0 {
		return i
	}
	return len(data)
}

func orderOfBytes(data [][]byte) int {
	switch len(data) {
	case 0, 1:
		return 0
	}
	data = skipBytesStreak(data)
	if len(data) < 2 {
		return 1
	}
	ordering := bytes.Compare(data[0], data[1])
	switch {
	case ordering < 0:
		if bytesAreInAscendingOrder(data[1:]) {
			return +1
		}
	case ordering > 0:
		if bytesAreInDescendingOrder(data[1:]) {
			return -1
		}
	}
	return 0
}

func skipBytesStreak(data [][]byte) [][]byte {
	for i := 1; i < len(data); i++ {
		if !bytes.Equal(data[i], data[0]) {
			return data[i-1:]
		}
	}
	return data[len(data)-1:]
}

func bytesAreInAscendingOrder(data [][]byte) bool {
	for i := len(data) - 1; i > 0; i-- {
		k := bytes.Compare(data[i-1], data[i])
		if k > 0 {
			return false
		}
	}
	return true
}

func bytesAreInDescendingOrder(data [][]byte) bool {
	for i := len(data) - 1; i > 0; i-- {
		k := bytes.Compare(data[i-1], data[i])
		if k < 0 {
			return false
		}
	}
	return true
}


================================================
FILE: order_amd64.go
================================================
//go:build !purego

package parquet

//go:noescape
func orderOfInt32(data []int32) int

//go:noescape
func orderOfInt64(data []int64) int

//go:noescape
func orderOfUint32(data []uint32) int

//go:noescape
func orderOfUint64(data []uint64) int

//go:noescape
func orderOfFloat32(data []float32) int

//go:noescape
func orderOfFloat64(data []float64) int


================================================
FILE: order_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define UNDEFINED 0
#define ASCENDING 1
#define DESCENDING -1

DATA shift1x32<>+0(SB)/4, $1
DATA shift1x32<>+4(SB)/4, $2
DATA shift1x32<>+8(SB)/4, $3
DATA shift1x32<>+12(SB)/4, $4
DATA shift1x32<>+16(SB)/4, $5
DATA shift1x32<>+20(SB)/4, $6
DATA shift1x32<>+24(SB)/4, $7
DATA shift1x32<>+28(SB)/4, $8
DATA shift1x32<>+32(SB)/4, $9
DATA shift1x32<>+36(SB)/4, $10
DATA shift1x32<>+40(SB)/4, $11
DATA shift1x32<>+44(SB)/4, $12
DATA shift1x32<>+48(SB)/4, $13
DATA shift1x32<>+52(SB)/4, $14
DATA shift1x32<>+56(SB)/4, $15
DATA shift1x32<>+60(SB)/4, $15
GLOBL shift1x32<>(SB), RODATA|NOPTR, $64

DATA shift1x64<>+0(SB)/4, $1
DATA shift1x64<>+8(SB)/4, $2
DATA shift1x64<>+16(SB)/4, $3
DATA shift1x64<>+24(SB)/4, $4
DATA shift1x64<>+32(SB)/4, $5
DATA shift1x64<>+40(SB)/4, $6
DATA shift1x64<>+48(SB)/4, $7
DATA shift1x64<>+56(SB)/4, $7
GLOBL shift1x64<>(SB), RODATA|NOPTR, $64

// func orderOfInt32(data []int32) int
TEXT ·orderOfInt32(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), R8
    MOVQ data_len+8(FP), R9
    XORQ SI, SI
    XORQ DI, DI

    CMPQ R9, $2
    JB undefined

    CMPB ·hasAVX512VL(SB), $0
    JE test

    CMPQ R9, $16
    JB test

    XORQ DX, DX
    MOVQ R9, AX
    SHRQ $4, AX
    SHLQ $4, AX
    MOVQ $15, CX
    IDIVQ CX
    IMULQ $15, AX
    DECQ R9

    VMOVDQU32 shift1x32<>(SB), Z2
    KXORW K2, K2, K2
testAscending15:
    VMOVDQU32 (R8)(SI*4), Z0
    VMOVDQU32 Z2, Z1
    VPERMI2D Z0, Z0, Z1
    VPCMPD $2, Z1, Z0, K1
    KORTESTW K2, K1
    JNC testDescending15
    ADDQ $15, SI
    CMPQ SI, AX
    JNE testAscending15
    VZEROUPPER
    JMP testAscending
testDescending15:
    VMOVDQU32 (R8)(DI*4), Z0
    VMOVDQU32 Z2, Z1
    VPERMI2D Z0, Z0, Z1
    VPCMPD $5, Z1, Z0, K1
    KORTESTW K2, K1
    JNC undefined15
    ADDQ $15, DI
    CMPQ DI, AX
    JNE testDescending15
    VZEROUPPER
    JMP testDescending

test:
    DECQ R9
testAscending:
    CMPQ SI, R9
    JAE ascending
    MOVL (R8)(SI*4), BX
    MOVL 4(R8)(SI*4), DX
    INCQ SI
    CMPL BX, DX
    JLE testAscending
    JMP testDescending
ascending:
    MOVQ $ASCENDING, ret+24(FP)
    RET
testDescending:
    CMPQ DI, R9
    JAE descending
    MOVL (R8)(DI*4), BX
    MOVL 4(R8)(DI*4), DX
    INCQ DI
    CMPL BX, DX
    JGE testDescending
    JMP undefined
descending:
    MOVQ $DESCENDING, ret+24(FP)
    RET
undefined15:
    VZEROUPPER
undefined:
    MOVQ $UNDEFINED, ret+24(FP)
    RET

// func orderOfInt64(data []int64) int
TEXT ·orderOfInt64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), R8
    MOVQ data_len+8(FP), R9
    XORQ SI, SI
    XORQ DI, DI

    CMPQ R9, $2
    JB undefined

    CMPB ·hasAVX512VL(SB), $0
    JE test

    CMPQ R9, $8
    JB test

    XORQ DX, DX
    MOVQ R9, AX
    SHRQ $3, AX
    SHLQ $3, AX
    MOVQ $7, CX
    IDIVQ CX
    IMULQ $7, AX
    DECQ R9

    VMOVDQU64 shift1x64<>(SB), Z2
    KXORB K2, K2, K2
testAscending7:
    VMOVDQU64 (R8)(SI*8), Z0
    VMOVDQU64 Z2, Z1
    VPERMI2Q Z0, Z0, Z1
    VPCMPQ $2, Z1, Z0, K1
    KORTESTB K2, K1
    JNC testDescending7
    ADDQ $7, SI
    CMPQ SI, AX
    JNE testAscending7
    VZEROUPPER
    JMP testAscending
testDescending7:
    VMOVDQU64 (R8)(DI*8), Z0
    VMOVDQU64 Z2, Z1
    VPERMI2Q Z0, Z0, Z1
    VPCMPQ $5, Z1, Z0, K1
    KORTESTB K2, K1
    JNC undefined7
    ADDQ $7, DI
    CMPQ DI, AX
    JNE testDescending7
    VZEROUPPER
    JMP testDescending

test:
    DECQ R9
testAscending:
    CMPQ SI, R9
    JAE ascending
    MOVQ (R8)(SI*8), BX
    MOVQ 8(R8)(SI*8), DX
    INCQ SI
    CMPQ BX, DX
    JLE testAscending
    JMP testDescending
ascending:
    MOVQ $ASCENDING, ret+24(FP)
    RET
testDescending:
    CMPQ DI, R9
    JAE descending
    MOVQ (R8)(DI*8), BX
    MOVQ 8(R8)(DI*8), DX
    INCQ DI
    CMPQ BX, DX
    JGE testDescending
    JMP undefined
descending:
    MOVQ $DESCENDING, ret+24(FP)
    RET
undefined7:
    VZEROUPPER
undefined:
    MOVQ $UNDEFINED, ret+24(FP)
    RET

// func orderOfUint32(data []uint32) int
TEXT ·orderOfUint32(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), R8
    MOVQ data_len+8(FP), R9
    XORQ SI, SI
    XORQ DI, DI

    CMPQ R9, $2
    JB undefined

    CMPB ·hasAVX512VL(SB), $0
    JE test

    CMPQ R9, $16
    JB test

    XORQ DX, DX
    MOVQ R9, AX
    SHRQ $4, AX
    SHLQ $4, AX
    MOVQ $15, CX
    IDIVQ CX
    IMULQ $15, AX
    DECQ R9

    VMOVDQU32 shift1x32<>(SB), Z2
    KXORW K2, K2, K2
testAscending15:
    VMOVDQU32 (R8)(SI*4), Z0
    VMOVDQU32 Z2, Z1
    VPERMI2D Z0, Z0, Z1
    VPCMPUD $2, Z1, Z0, K1
    KORTESTW K2, K1
    JNC testDescending15
    ADDQ $15, SI
    CMPQ SI, AX
    JNE testAscending15
    VZEROUPPER
    JMP testAscending
testDescending15:
    VMOVDQU32 (R8)(DI*4), Z0
    VMOVDQU32 Z2, Z1
    VPERMI2D Z0, Z0, Z1
    VPCMPUD $5, Z1, Z0, K1
    KORTESTW K2, K1
    JNC undefined15
    ADDQ $15, DI
    CMPQ DI, AX
    JNE testDescending15
    VZEROUPPER
    JMP testDescending

test:
    DECQ R9
testAscending:
    CMPQ SI, R9
    JAE ascending
    MOVL (R8)(SI*4), BX
    MOVL 4(R8)(SI*4), DX
    INCQ SI
    CMPL BX, DX
    JBE testAscending
    JMP testDescending
ascending:
    MOVQ $ASCENDING, ret+24(FP)
    RET
testDescending:
    CMPQ DI, R9
    JAE descending
    MOVL (R8)(DI*4), BX
    MOVL 4(R8)(DI*4), DX
    INCQ DI
    CMPL BX, DX
    JAE testDescending
    JMP undefined
descending:
    MOVQ $DESCENDING, ret+24(FP)
    RET
undefined15:
    VZEROUPPER
undefined:
    MOVQ $UNDEFINED, ret+24(FP)
    RET

// func orderOfUint64(data []uint64) int
TEXT ·orderOfUint64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), R8
    MOVQ data_len+8(FP), R9
    XORQ SI, SI
    XORQ DI, DI

    CMPQ R9, $2
    JB undefined

    CMPB ·hasAVX512VL(SB), $0
    JE test

    CMPQ R9, $8
    JB test

    XORQ DX, DX
    MOVQ R9, AX
    SHRQ $3, AX
    SHLQ $3, AX
    MOVQ $7, CX
    IDIVQ CX
    IMULQ $7, AX
    DECQ R9

    VMOVDQU64 shift1x64<>(SB), Z2
    KXORB K2, K2, K2
testAscending7:
    VMOVDQU64 (R8)(SI*8), Z0
    VMOVDQU64 Z2, Z1
    VPERMI2Q Z0, Z0, Z1
    VPCMPUQ $2, Z1, Z0, K1
    KORTESTB K2, K1
    JNC testDescending7
    ADDQ $7, SI
    CMPQ SI, AX
    JNE testAscending7
    VZEROUPPER
    JMP testAscending
testDescending7:
    VMOVDQU64 (R8)(DI*8), Z0
    VMOVDQU64 Z2, Z1
    VPERMI2Q Z0, Z0, Z1
    VPCMPUQ $5, Z1, Z0, K1
    KORTESTB K2, K1
    JNC undefined7
    ADDQ $7, DI
    CMPQ DI, AX
    JNE testDescending7
    VZEROUPPER
    JMP testDescending

test:
    DECQ R9
testAscending:
    CMPQ SI, R9
    JAE ascending
    MOVQ (R8)(SI*8), BX
    MOVQ 8(R8)(SI*8), DX
    INCQ SI
    CMPQ BX, DX
    JBE testAscending
    JMP testDescending
ascending:
    MOVQ $ASCENDING, ret+24(FP)
    RET
testDescending:
    CMPQ DI, R9
    JAE descending
    MOVQ (R8)(DI*8), BX
    MOVQ 8(R8)(DI*8), DX
    INCQ DI
    CMPQ BX, DX
    JAE testDescending
    JMP undefined
descending:
    MOVQ $DESCENDING, ret+24(FP)
    RET
undefined7:
    VZEROUPPER
undefined:
    MOVQ $UNDEFINED, ret+24(FP)
    RET

// func orderOfFloat32(data []float32) int
TEXT ·orderOfFloat32(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), R8
    MOVQ data_len+8(FP), R9
    XORQ SI, SI
    XORQ DI, DI

    CMPQ R9, $2
    JB undefined

    CMPB ·hasAVX512VL(SB), $0
    JE test

    CMPQ R9, $16
    JB test

    XORQ DX, DX
    MOVQ R9, AX
    SHRQ $4, AX
    SHLQ $4, AX
    MOVQ $15, CX
    IDIVQ CX
    IMULQ $15, AX
    DECQ R9

    VMOVDQU32 shift1x32<>(SB), Z2
    KXORW K2, K2, K2
testAscending15:
    VMOVDQU32 (R8)(SI*4), Z0
    VMOVDQU32 Z2, Z1
    VPERMI2D Z0, Z0, Z1
    VCMPPS $2, Z1, Z0, K1
    KORTESTW K2, K1
    JNC testDescending15
    ADDQ $15, SI
    CMPQ SI, AX
    JNE testAscending15
    VZEROUPPER
    JMP testAscending
testDescending15:
    VMOVDQU32 (R8)(DI*4), Z0
    VMOVDQU32 Z2, Z1
    VPERMI2D Z0, Z0, Z1
    VCMPPS $5, Z1, Z0, K1
    KORTESTW K2, K1
    JNC undefined15
    ADDQ $15, DI
    CMPQ DI, AX
    JNE testDescending15
    VZEROUPPER
    JMP testDescending

test:
    DECQ R9
testAscending:
    CMPQ SI, R9
    JAE ascending
    MOVLQZX (R8)(SI*4), BX
    MOVLQZX 4(R8)(SI*4), DX
    INCQ SI
    MOVQ BX, X0
    MOVQ DX, X1
    UCOMISS X1, X0
    JBE testAscending
    JMP testDescending
ascending:
    MOVQ $ASCENDING, ret+24(FP)
    RET
testDescending:
    CMPQ DI, R9
    JAE descending
    MOVLQZX (R8)(DI*4), BX
    MOVLQZX 4(R8)(DI*4), DX
    INCQ DI
    MOVQ BX, X0
    MOVQ DX, X1
    UCOMISS X1, X0
    JAE testDescending
    JMP undefined
descending:
    MOVQ $DESCENDING, ret+24(FP)
    RET
undefined15:
    VZEROUPPER
undefined:
    MOVQ $UNDEFINED, ret+24(FP)
    RET

// func orderOfFloat64(data []uint64) int
TEXT ·orderOfFloat64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), R8
    MOVQ data_len+8(FP), R9
    XORQ SI, SI
    XORQ DI, DI

    CMPQ R9, $2
    JB undefined

    CMPB ·hasAVX512VL(SB), $0
    JE test

    CMPQ R9, $8
    JB test

    XORQ DX, DX
    MOVQ R9, AX
    SHRQ $3, AX
    SHLQ $3, AX
    MOVQ $7, CX
    IDIVQ CX
    IMULQ $7, AX
    DECQ R9

    VMOVDQU64 shift1x64<>(SB), Z2
    KXORB K2, K2, K2
testAscending7:
    VMOVDQU64 (R8)(SI*8), Z0
    VMOVDQU64 Z2, Z1
    VPERMI2Q Z0, Z0, Z1
    VCMPPD $2, Z1, Z0, K1
    KORTESTB K2, K1
    JNC testDescending7
    ADDQ $7, SI
    CMPQ SI, AX
    JNE testAscending7
    VZEROUPPER
    JMP testAscending
testDescending7:
    VMOVDQU64 (R8)(DI*8), Z0
    VMOVDQU64 Z2, Z1
    VPERMI2Q Z0, Z0, Z1
    VCMPPD $5, Z1, Z0, K1
    KORTESTB K2, K1
    JNC undefined7
    ADDQ $7, DI
    CMPQ DI, AX
    JNE testDescending7
    VZEROUPPER
    JMP testDescending

test:
    DECQ R9
testAscending:
    CMPQ SI, R9
    JAE ascending
    MOVQ (R8)(SI*8), BX
    MOVQ 8(R8)(SI*8), DX
    INCQ SI
    MOVQ BX, X0
    MOVQ DX, X1
    UCOMISD X1, X0
    JBE testAscending
    JMP testDescending
ascending:
    MOVQ $ASCENDING, ret+24(FP)
    RET
testDescending:
    CMPQ DI, R9
    JAE descending
    MOVQ (R8)(DI*8), BX
    MOVQ 8(R8)(DI*8), DX
    INCQ DI
    MOVQ BX, X0
    MOVQ DX, X1
    UCOMISD X1, X0
    JAE testDescending
    JMP undefined
descending:
    MOVQ $DESCENDING, ret+24(FP)
    RET
undefined7:
    VZEROUPPER
undefined:
    MOVQ $UNDEFINED, ret+24(FP)
    RET


================================================
FILE: order_purego.go
================================================
//go:build purego || !amd64

package parquet

// -----------------------------------------------------------------------------
// TODO: use generics versions of the these functions to reduce the amount of
// code to maintain when we drop compatilibty with Go version older than 1.18.
// -----------------------------------------------------------------------------

func orderOfInt32(data []int32) int {
	if len(data) > 1 {
		if int32AreInAscendingOrder(data) {
			return +1
		}
		if int32AreInDescendingOrder(data) {
			return -1
		}
	}
	return 0
}

func orderOfInt64(data []int64) int {
	if len(data) > 1 {
		if int64AreInAscendingOrder(data) {
			return +1
		}
		if int64AreInDescendingOrder(data) {
			return -1
		}
	}
	return 0
}

func orderOfUint32(data []uint32) int {
	if len(data) > 1 {
		if uint32AreInAscendingOrder(data) {
			return +1
		}
		if uint32AreInDescendingOrder(data) {
			return -1
		}
	}
	return 0
}

func orderOfUint64(data []uint64) int {
	if len(data) > 1 {
		if uint64AreInAscendingOrder(data) {
			return +1
		}
		if uint64AreInDescendingOrder(data) {
			return -1
		}
	}
	return 0
}

func orderOfFloat32(data []float32) int {
	if len(data) > 1 {
		if float32AreInAscendingOrder(data) {
			return +1
		}
		if float32AreInDescendingOrder(data) {
			return -1
		}
	}
	return 0
}

func orderOfFloat64(data []float64) int {
	if len(data) > 1 {
		if float64AreInAscendingOrder(data) {
			return +1
		}
		if float64AreInDescendingOrder(data) {
			return -1
		}
	}
	return 0
}

func int32AreInAscendingOrder(data []int32) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] > data[i] {
			return false
		}
	}
	return true
}

func int32AreInDescendingOrder(data []int32) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] < data[i] {
			return false
		}
	}
	return true
}

func int64AreInAscendingOrder(data []int64) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] > data[i] {
			return false
		}
	}
	return true
}

func int64AreInDescendingOrder(data []int64) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] < data[i] {
			return false
		}
	}
	return true
}

func uint32AreInAscendingOrder(data []uint32) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] > data[i] {
			return false
		}
	}
	return true
}

func uint32AreInDescendingOrder(data []uint32) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] < data[i] {
			return false
		}
	}
	return true
}

func uint64AreInAscendingOrder(data []uint64) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] > data[i] {
			return false
		}
	}
	return true
}

func uint64AreInDescendingOrder(data []uint64) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] < data[i] {
			return false
		}
	}
	return true
}

func float32AreInAscendingOrder(data []float32) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] > data[i] {
			return false
		}
	}
	return true
}

func float32AreInDescendingOrder(data []float32) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] < data[i] {
			return false
		}
	}
	return true
}

func float64AreInAscendingOrder(data []float64) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] > data[i] {
			return false
		}
	}
	return true
}

func float64AreInDescendingOrder(data []float64) bool {
	for i := len(data) - 1; i > 0; i-- {
		if data[i-1] < data[i] {
			return false
		}
	}
	return true
}


================================================
FILE: order_test.go
================================================
package parquet

import (
	"bytes"
	"sort"
	"testing"

	"github.com/segmentio/parquet-go/internal/quick"
)

type boolOrder []bool

func (v boolOrder) Len() int           { return len(v) }
func (v boolOrder) Less(i, j int) bool { return !v[i] && v[j] }
func (v boolOrder) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

type int32Order []int32

func (v int32Order) Len() int           { return len(v) }
func (v int32Order) Less(i, j int) bool { return v[i] < v[j] }
func (v int32Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

type int64Order []int64

func (v int64Order) Len() int           { return len(v) }
func (v int64Order) Less(i, j int) bool { return v[i] < v[j] }
func (v int64Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

type uint32Order []uint32

func (v uint32Order) Len() int           { return len(v) }
func (v uint32Order) Less(i, j int) bool { return v[i] < v[j] }
func (v uint32Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

type uint64Order []uint64

func (v uint64Order) Len() int           { return len(v) }
func (v uint64Order) Less(i, j int) bool { return v[i] < v[j] }
func (v uint64Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

type float32Order []float32

func (v float32Order) Len() int           { return len(v) }
func (v float32Order) Less(i, j int) bool { return v[i] < v[j] }
func (v float32Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

type float64Order []float64

func (v float64Order) Len() int           { return len(v) }
func (v float64Order) Less(i, j int) bool { return v[i] < v[j] }
func (v float64Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

type bytesOrder [][]byte

func (v bytesOrder) Len() int           { return len(v) }
func (v bytesOrder) Less(i, j int) bool { return bytes.Compare(v[i], v[j]) < 0 }
func (v bytesOrder) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }

func orderingName(ordering int) string {
	switch {
	case isAscending(ordering):
		return "ascending"
	case isDescending(ordering):
		return "descending"
	default:
		return "undefined"
	}
}

func isAscending(ordering int) bool {
	return ordering > 0
}

func isDescending(ordering int) bool {
	return ordering < 0
}

func isUndefined(ordering int) bool {
	return ordering == 0
}

func isOrdered(set sort.Interface) bool {
	return set.Len() > 1 && sort.IsSorted(set)
}

func checkOrdering(t *testing.T, set sort.Interface, ordering int) bool {
	t.Helper()
	switch {
	case isOrdered(set):
		if !isAscending(ordering) {
			t.Errorf("got=%s want=ascending", orderingName(ordering))
			return false
		}
	case isOrdered(sort.Reverse(set)):
		if !isDescending(ordering) {
			t.Errorf("got=%s want=descending", orderingName(ordering))
			return false
		}
	default:
		if !isUndefined(ordering) {
			t.Errorf("got=%s want=undefined", orderingName(ordering))
			return false
		}
	}
	return true
}

func TestOrderOfBool(t *testing.T) {
	check := func(values []bool) bool {
		return checkOrdering(t, boolOrder(values), orderOfBool(values))
	}
	err := quick.Check(func(values []bool) bool {
		if !check(values) {
			return false
		}
		sort.Sort(boolOrder(values))
		if !check(values) {
			return false
		}
		sort.Sort(sort.Reverse(boolOrder(values)))
		if !check(values) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}
}

func TestOrderOfInt32(t *testing.T) {
	check := func(values []int32) bool {
		return checkOrdering(t, int32Order(values), orderOfInt32(values))
	}
	err := quick.Check(func(values []int32) bool {
		if !check(values) {
			return false
		}
		sort.Sort(int32Order(values))
		if !check(values) {
			return false
		}
		sort.Sort(sort.Reverse(int32Order(values)))
		if !check(values) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}

	// This extra test validates that out-of-order values at 64 byte boundaries
	// are properly detected; it tests corner cases of the vectorized code path
	// which works on 64 bytes per loop iteration.
	values := []int32{
		0, 1, 2, 3, 4, 5, 6, 7,
		8, 9, 10, 11, 12, 13, 14, 15,
		// 15 > 14, the algorithm must detect that the values are not ordered.
		14, 17, 18, 19, 20, 21, 22, 23,
		24, 25, 26, 27, 28, 29, 30, 31,
	}

	if !check(values) {
		t.Error("failed due to not checking the connection between sequences of 16 elements")
	}
}

func TestOrderOfInt64(t *testing.T) {
	check := func(values []int64) bool {
		return checkOrdering(t, int64Order(values), orderOfInt64(values))
	}
	err := quick.Check(func(values []int64) bool {
		if !check(values) {
			return false
		}
		sort.Sort(int64Order(values))
		if !check(values) {
			return false
		}
		sort.Sort(sort.Reverse(int64Order(values)))
		if !check(values) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}

	values := []int64{
		0, 1, 2, 3, 4, 5, 6, 7,
		6, 9, 10, 11, 12, 13, 14, 15,
		14, 17, 18, 19, 20, 21, 22, 23,
		24, 25, 26, 27, 28, 29, 30, 31,
	}

	if !check(values) {
		t.Error("failed due to not checking the connection between sequences of 8 elements")
	}
}

func TestOrderOfUint32(t *testing.T) {
	check := func(values []uint32) bool {
		return checkOrdering(t, uint32Order(values), orderOfUint32(values))
	}
	err := quick.Check(func(values []uint32) bool {
		if !check(values) {
			return false
		}
		sort.Sort(uint32Order(values))
		if !check(values) {
			return false
		}
		sort.Sort(sort.Reverse(uint32Order(values)))
		if !check(values) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}

	values := []uint32{
		0, 1, 2, 3, 4, 5, 6, 7,
		8, 9, 10, 11, 12, 13, 14, 15,
		14, 17, 18, 19, 20, 21, 22, 23,
		24, 25, 26, 27, 28, 29, 30, 31,
	}

	if !check(values) {
		t.Error("failed due to not checking the connection between sequences of 16 elements")
	}
}

func TestOrderOfUint64(t *testing.T) {
	check := func(values []uint64) bool {
		return checkOrdering(t, uint64Order(values), orderOfUint64(values))
	}
	err := quick.Check(func(values []uint64) bool {
		if !check(values) {
			return false
		}
		sort.Sort(uint64Order(values))
		if !check(values) {
			return false
		}
		sort.Sort(sort.Reverse(uint64Order(values)))
		if !check(values) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}

	values := []uint64{
		0, 1, 2, 3, 4, 5, 6, 7,
		6, 9, 10, 11, 12, 13, 14, 15,
		14, 17, 18, 19, 20, 21, 22, 23,
		24, 25, 26, 27, 28, 29, 30, 31,
	}

	if !check(values) {
		t.Error("failed due to not checking the connection between sequences of 8 elements")
	}
}

func TestOrderOfFloat32(t *testing.T) {
	check := func(values []float32) bool {
		return checkOrdering(t, float32Order(values), orderOfFloat32(values))
	}
	err := quick.Check(func(values []float32) bool {
		if !check(values) {
			return false
		}
		sort.Sort(float32Order(values))
		if !check(values) {
			return false
		}
		sort.Sort(sort.Reverse(float32Order(values)))
		if !check(values) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}

	values := []float32{
		0, 1, 2, 3, 4, 5, 6, 7,
		8, 9, 10, 11, 12, 13, 14, 15,
		14, 17, 18, 19, 20, 21, 22, 23,
		24, 25, 26, 27, 28, 29, 30, 31,
	}

	if !check(values) {
		t.Error("failed due to not checking the connection between sequences of 16 elements")
	}
}

func TestOrderOfFloat64(t *testing.T) {
	check := func(values []float64) bool {
		return checkOrdering(t, float64Order(values), orderOfFloat64(values))
	}
	err := quick.Check(func(values []float64) bool {
		if !check(values) {
			return false
		}
		sort.Sort(float64Order(values))
		if !check(values) {
			return false
		}
		sort.Sort(sort.Reverse(float64Order(values)))
		if !check(values) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}

	values := []float64{
		0, 1, 2, 3, 4, 5, 6, 7,
		6, 9, 10, 11, 12, 13, 14, 15,
		14, 17, 18, 19, 20, 21, 22, 23,
		24, 25, 26, 27, 28, 29, 30, 31,
	}

	if !check(values) {
		t.Error("failed due to not checking the connection between sequences of 8 elements")
	}
}

func TestOrderOfBytes(t *testing.T) {
	check := func(values [][]byte) bool {
		return checkOrdering(t, bytesOrder(values), orderOfBytes(values))
	}
	err := quick.Check(func(values [][16]byte) bool {
		slices := make([][]byte, len(values))
		for i := range values {
			slices[i] = values[i][:]
		}
		if !check(slices) {
			return false
		}
		sort.Sort(bytesOrder(slices))
		if !check(slices) {
			return false
		}
		sort.Sort(sort.Reverse(bytesOrder(slices)))
		if !check(slices) {
			return false
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}
}

func BenchmarkOrderOfBool(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]bool, bufferSize/1)
		for i := 0; i < b.N; i++ {
			orderOfBool(values)
		}
	})
}

func BenchmarkOrderOfInt32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int32, bufferSize/4)
		for i := 0; i < b.N; i++ {
			orderOfInt32(values)
		}
	})
}

func BenchmarkOrderOfInt64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int64, bufferSize/8)
		for i := 0; i < b.N; i++ {
			orderOfInt64(values)
		}
	})
}

func BenchmarkOrderOfUint32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint32, bufferSize/4)
		for i := 0; i < b.N; i++ {
			orderOfUint32(values)
		}
	})
}

func BenchmarkOrderOfUint64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint64, bufferSize/8)
		for i := 0; i < b.N; i++ {
			orderOfUint64(values)
		}
	})
}

func BenchmarkOrderOfFloat32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float32, bufferSize/4)
		for i := 0; i < b.N; i++ {
			orderOfFloat32(values)
		}
	})
}

func BenchmarkOrderOfFloat64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float64, bufferSize/8)
		for i := 0; i < b.N; i++ {
			orderOfFloat64(values)
		}
	})
}

func BenchmarkOrderOfBytes(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		data := make([]byte, bufferSize)
		values := make([][]byte, len(data)/16)
		for i := range values {
			values[i] = data[i*16 : (i+1)*16]
		}
		for i := 0; i < b.N; i++ {
			orderOfBytes(values)
		}
	})
}


================================================
FILE: page.go
================================================
package parquet

import (
	"bytes"
	"fmt"
	"io"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/internal/bitpack"
	"github.com/segmentio/parquet-go/internal/debug"
)

// Page values represent sequences of parquet values. From the Parquet
// documentation: "Column chunks are a chunk of the data for a particular
// column. They live in a particular row group and are guaranteed to be
// contiguous in the file. Column chunks are divided up into pages. A page is
// conceptually an indivisible unit (in terms of compression and encoding).
// There can be multiple page types which are interleaved in a column chunk."
//
// https://github.com/apache/parquet-format#glossary
type Page interface {
	// Returns the type of values read from this page.
	//
	// The returned type can be used to encode the page data, in the case of
	// an indexed page (which has a dictionary), the type is configured to
	// encode the indexes stored in the page rather than the plain values.
	Type() Type

	// Returns the column index that this page belongs to.
	Column() int

	// If the page contains indexed values, calling this method returns the
	// dictionary in which the values are looked up. Otherwise, the method
	// returns nil.
	Dictionary() Dictionary

	// Returns the number of rows, values, and nulls in the page. The number of
	// rows may be less than the number of values in the page if the page is
	// part of a repeated column.
	NumRows() int64
	NumValues() int64
	NumNulls() int64

	// Returns the page's min and max values.
	//
	// The third value is a boolean indicating whether the page bounds were
	// available. Page bounds may not be known if the page contained no values
	// or only nulls, or if they were read from a parquet file which had neither
	// page statistics nor a page index.
	Bounds() (min, max Value, ok bool)

	// Returns the size of the page in bytes (uncompressed).
	Size() int64

	// Returns a reader exposing the values contained in the page.
	//
	// Depending on the underlying implementation, the returned reader may
	// support reading an array of typed Go values by implementing interfaces
	// like parquet.Int32Reader. Applications should use type assertions on
	// the returned reader to determine whether those optimizations are
	// available.
	Values() ValueReader

	// Returns a new page which is as slice of the receiver between row indexes
	// i and j.
	Slice(i, j int64) Page

	// Expose the lists of repetition and definition levels of the page.
	//
	// The returned slices may be empty when the page has no repetition or
	// definition levels.
	RepetitionLevels() []byte
	DefinitionLevels() []byte

	// Returns the in-memory buffer holding the page values.
	//
	// The intent is for the returned value to be used as input parameter when
	// calling the Encode method of the associated Type.
	//
	// The slices referenced by the encoding.Values may be the same across
	// multiple calls to this method, applications must treat the content as
	// immutable.
	Data() encoding.Values
}

// PageReader is an interface implemented by types that support producing a
// sequence of pages.
type PageReader interface {
	// Reads and returns the next page from the sequence. When all pages have
	// been read, or if the sequence was closed, the method returns io.EOF.
	ReadPage() (Page, error)
}

// PageWriter is an interface implemented by types that support writing pages
// to an underlying storage medium.
type PageWriter interface {
	WritePage(Page) (int64, error)
}

// Pages is an interface implemented by page readers returned by calling the
// Pages method of ColumnChunk instances.
type Pages interface {
	PageReader
	RowSeeker
	io.Closer
}

// AsyncPages wraps the given Pages instance to perform page reads
// asynchronously in a separate goroutine.
//
// Performing page reads asynchronously is important when the application may
// be reading pages from a high latency backend, and the last
// page read may be processed while initiating reading of the next page.
func AsyncPages(pages Pages) Pages {
	p := new(asyncPages)
	p.init(pages, nil)
	// If the pages object gets garbage collected without Close being called,
	// this finalizer would ensure that the goroutine is stopped and doesn't
	// leak.
	debug.SetFinalizer(p, func(p *asyncPages) { p.Close() })
	return p
}

type asyncPages struct {
	read    <-chan asyncPage
	seek    chan<- int64
	done    chan<- struct{}
	version int64
}

type asyncPage struct {
	page    Page
	err     error
	version int64
}

func (pages *asyncPages) init(base Pages, done chan struct{}) {
	read := make(chan asyncPage)
	seek := make(chan int64, 1)

	pages.read = read
	pages.seek = seek

	if done == nil {
		done = make(chan struct{})
		pages.done = done
	}

	go readPages(base, read, seek, done)
}

func (pages *asyncPages) Close() (err error) {
	if pages.done != nil {
		close(pages.done)
		pages.done = nil
	}
	for p := range pages.read {
		// Capture the last error, which is the value returned from closing the
		// underlying Pages instance.
		err = p.err
	}
	pages.seek = nil
	return err
}

func (pages *asyncPages) ReadPage() (Page, error) {
	for {
		p, ok := <-pages.read
		if !ok {
			return nil, io.EOF
		}
		// Because calls to SeekToRow might be made concurrently to reading
		// pages, it is possible for ReadPage to see pages that were read before
		// the last SeekToRow call.
		//
		// A version number is attached to each page read asynchronously to
		// discard outdated pages and ensure that we maintain a consistent view
		// of the sequence of pages read.
		if p.version == pages.version {
			return p.page, p.err
		}
	}
}

func (pages *asyncPages) SeekToRow(rowIndex int64) error {
	if pages.seek == nil {
		return io.ErrClosedPipe
	}
	// The seek channel has a capacity of 1 to allow the first SeekToRow call to
	// be non-blocking.
	//
	// If SeekToRow calls are performed faster than they can be handled by the
	// goroutine reading pages, this path might become a contention point.
	pages.seek <- rowIndex
	pages.version++
	return nil
}

func readPages(pages Pages, read chan<- asyncPage, seek <-chan int64, done <-chan struct{}) {
	defer func() {
		read <- asyncPage{err: pages.Close(), version: -1}
		close(read)
	}()

	version := int64(0)
	for {
		page, err := pages.ReadPage()

		for {
			select {
			case <-done:
				return
			case read <- asyncPage{
				page:    page,
				err:     err,
				version: version,
			}:
			case rowIndex := <-seek:
				version++
				err = pages.SeekToRow(rowIndex)
			}
			if err == nil {
				break
			}
		}
	}
}

type singlePage struct {
	page    Page
	seek    int64
	numRows int64
}

func (r *singlePage) ReadPage() (Page, error) {
	if r.page != nil {
		if r.seek < r.numRows {
			seek := r.seek
			r.seek = r.numRows
			if seek > 0 {
				return r.page.Slice(seek, r.numRows), nil
			}
			return r.page, nil
		}
	}
	return nil, io.EOF
}

func (r *singlePage) SeekToRow(rowIndex int64) error {
	r.seek = rowIndex
	return nil
}

func (r *singlePage) Close() error {
	r.page = nil
	r.seek = 0
	return nil
}

func onePage(page Page) Pages {
	return &singlePage{page: page, numRows: page.NumRows()}
}

// CopyPages copies pages from src to dst, returning the number of values that
// were copied.
//
// The function returns any error it encounters reading or writing pages, except
// for io.EOF from the reader which indicates that there were no more pages to
// read.
func CopyPages(dst PageWriter, src PageReader) (numValues int64, err error) {
	for {
		p, err := src.ReadPage()
		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return numValues, err
		}
		n, err := dst.WritePage(p)
		numValues += n
		if err != nil {
			return numValues, err
		}
	}
}

// errorPage is an implementation of the Page interface which always errors when
// attempting to read its values.
//
// The error page declares that it contains one value (even if it does not)
// as a way to ensure that it is not ignored due to being empty when written
// to a file.
type errorPage struct {
	typ         Type
	err         error
	columnIndex int
}

func newErrorPage(typ Type, columnIndex int, msg string, args ...interface{}) *errorPage {
	return &errorPage{
		typ:         typ,
		err:         fmt.Errorf(msg, args...),
		columnIndex: columnIndex,
	}
}

func (page *errorPage) Type() Type                        { return page.typ }
func (page *errorPage) Column() int                       { return page.columnIndex }
func (page *errorPage) Dictionary() Dictionary            { return nil }
func (page *errorPage) NumRows() int64                    { return 1 }
func (page *errorPage) NumValues() int64                  { return 1 }
func (page *errorPage) NumNulls() int64                   { return 0 }
func (page *errorPage) Bounds() (min, max Value, ok bool) { return }
func (page *errorPage) Slice(i, j int64) Page             { return page }
func (page *errorPage) Size() int64                       { return 1 }
func (page *errorPage) RepetitionLevels() []byte          { return nil }
func (page *errorPage) DefinitionLevels() []byte          { return nil }
func (page *errorPage) Data() encoding.Values             { return encoding.Values{} }
func (page *errorPage) Values() ValueReader               { return errorPageValues{page: page} }

type errorPageValues struct{ page *errorPage }

func (r errorPageValues) ReadValues([]Value) (int, error) { return 0, r.page.err }
func (r errorPageValues) Close() error                    { return nil }

func errPageBoundsOutOfRange(i, j, n int64) error {
	return fmt.Errorf("page bounds out of range [%d:%d]: with length %d", i, j, n)
}

type optionalPage struct {
	base               Page
	maxDefinitionLevel byte
	definitionLevels   []byte
}

func newOptionalPage(base Page, maxDefinitionLevel byte, definitionLevels []byte) *optionalPage {
	return &optionalPage{
		base:               base,
		maxDefinitionLevel: maxDefinitionLevel,
		definitionLevels:   definitionLevels,
	}
}

func (page *optionalPage) Type() Type { return page.base.Type() }

func (page *optionalPage) Column() int { return page.base.Column() }

func (page *optionalPage) Dictionary() Dictionary { return page.base.Dictionary() }

func (page *optionalPage) NumRows() int64 { return int64(len(page.definitionLevels)) }

func (page *optionalPage) NumValues() int64 { return int64(len(page.definitionLevels)) }

func (page *optionalPage) NumNulls() int64 {
	return int64(countLevelsNotEqual(page.definitionLevels, page.maxDefinitionLevel))
}

func (page *optionalPage) Bounds() (min, max Value, ok bool) { return page.base.Bounds() }

func (page *optionalPage) Size() int64 { return int64(len(page.definitionLevels)) + page.base.Size() }

func (page *optionalPage) RepetitionLevels() []byte { return nil }

func (page *optionalPage) DefinitionLevels() []byte { return page.definitionLevels }

func (page *optionalPage) Data() encoding.Values { return page.base.Data() }

func (page *optionalPage) Values() ValueReader {
	return &optionalPageValues{
		page:   page,
		values: page.base.Values(),
	}
}

func (page *optionalPage) Slice(i, j int64) Page {
	maxDefinitionLevel := page.maxDefinitionLevel
	definitionLevels := page.definitionLevels
	numNulls1 := int64(countLevelsNotEqual(definitionLevels[:i], maxDefinitionLevel))
	numNulls2 := int64(countLevelsNotEqual(definitionLevels[i:j], maxDefinitionLevel))
	return newOptionalPage(
		page.base.Slice(i-numNulls1, j-(numNulls1+numNulls2)),
		maxDefinitionLevel,
		definitionLevels[i:j:j],
	)
}

type repeatedPage struct {
	base               Page
	maxRepetitionLevel byte
	maxDefinitionLevel byte
	definitionLevels   []byte
	repetitionLevels   []byte
}

func newRepeatedPage(base Page, maxRepetitionLevel, maxDefinitionLevel byte, repetitionLevels, definitionLevels []byte) *repeatedPage {
	return &repeatedPage{
		base:               base,
		maxRepetitionLevel: maxRepetitionLevel,
		maxDefinitionLevel: maxDefinitionLevel,
		definitionLevels:   definitionLevels,
		repetitionLevels:   repetitionLevels,
	}
}

func (page *repeatedPage) Type() Type { return page.base.Type() }

func (page *repeatedPage) Column() int { return page.base.Column() }

func (page *repeatedPage) Dictionary() Dictionary { return page.base.Dictionary() }

func (page *repeatedPage) NumRows() int64 { return int64(countLevelsEqual(page.repetitionLevels, 0)) }

func (page *repeatedPage) NumValues() int64 { return int64(len(page.definitionLevels)) }

func (page *repeatedPage) NumNulls() int64 {
	return int64(countLevelsNotEqual(page.definitionLevels, page.maxDefinitionLevel))
}

func (page *repeatedPage) Bounds() (min, max Value, ok bool) { return page.base.Bounds() }

func (page *repeatedPage) Size() int64 {
	return int64(len(page.repetitionLevels)) + int64(len(page.definitionLevels)) + page.base.Size()
}

func (page *repeatedPage) RepetitionLevels() []byte { return page.repetitionLevels }

func (page *repeatedPage) DefinitionLevels() []byte { return page.definitionLevels }

func (page *repeatedPage) Data() encoding.Values { return page.base.Data() }

func (page *repeatedPage) Values() ValueReader {
	return &repeatedPageValues{
		page:   page,
		values: page.base.Values(),
	}
}

func (page *repeatedPage) Slice(i, j int64) Page {
	numRows := page.NumRows()
	if i < 0 || i > numRows {
		panic(errPageBoundsOutOfRange(i, j, numRows))
	}
	if j < 0 || j > numRows {
		panic(errPageBoundsOutOfRange(i, j, numRows))
	}
	if i > j {
		panic(errPageBoundsOutOfRange(i, j, numRows))
	}

	maxRepetitionLevel := page.maxRepetitionLevel
	maxDefinitionLevel := page.maxDefinitionLevel
	repetitionLevels := page.repetitionLevels
	definitionLevels := page.definitionLevels

	rowIndex0 := 0
	rowIndex1 := len(repetitionLevels)
	rowIndex2 := len(repetitionLevels)

	for k, def := range repetitionLevels {
		if def == 0 {
			if rowIndex0 == int(i) {
				rowIndex1 = k
				break
			}
			rowIndex0++
		}
	}

	for k, def := range repetitionLevels[rowIndex1:] {
		if def == 0 {
			if rowIndex0 == int(j) {
				rowIndex2 = rowIndex1 + k
				break
			}
			rowIndex0++
		}
	}

	numNulls1 := countLevelsNotEqual(definitionLevels[:rowIndex1], maxDefinitionLevel)
	numNulls2 := countLevelsNotEqual(definitionLevels[rowIndex1:rowIndex2], maxDefinitionLevel)

	i = int64(rowIndex1 - numNulls1)
	j = int64(rowIndex2 - (numNulls1 + numNulls2))

	return newRepeatedPage(
		page.base.Slice(i, j),
		maxRepetitionLevel,
		maxDefinitionLevel,
		repetitionLevels[rowIndex1:rowIndex2:rowIndex2],
		definitionLevels[rowIndex1:rowIndex2:rowIndex2],
	)
}

type booleanPage struct {
	typ         Type
	bits        []byte
	offset      int32
	numValues   int32
	columnIndex int16
}

func newBooleanPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *booleanPage {
	return &booleanPage{
		typ:         typ,
		bits:        values.Boolean()[:bitpack.ByteCount(uint(numValues))],
		numValues:   numValues,
		columnIndex: ^columnIndex,
	}
}

func (page *booleanPage) Type() Type { return page.typ }

func (page *booleanPage) Column() int { return int(^page.columnIndex) }

func (page *booleanPage) Dictionary() Dictionary { return nil }

func (page *booleanPage) NumRows() int64 { return int64(page.numValues) }

func (page *booleanPage) NumValues() int64 { return int64(page.numValues) }

func (page *booleanPage) NumNulls() int64 { return 0 }

func (page *booleanPage) Size() int64 { return int64(len(page.bits)) }

func (page *booleanPage) RepetitionLevels() []byte { return nil }

func (page *booleanPage) DefinitionLevels() []byte { return nil }

func (page *booleanPage) Data() encoding.Values { return encoding.BooleanValues(page.bits) }

func (page *booleanPage) Values() ValueReader { return &booleanPageValues{page: page} }

func (page *booleanPage) valueAt(i int) bool {
	j := uint32(int(page.offset)+i) / 8
	k := uint32(int(page.offset)+i) % 8
	return ((page.bits[j] >> k) & 1) != 0
}

func (page *booleanPage) min() bool {
	for i := 0; i < int(page.numValues); i++ {
		if !page.valueAt(i) {
			return false
		}
	}
	return page.numValues > 0
}

func (page *booleanPage) max() bool {
	for i := 0; i < int(page.numValues); i++ {
		if page.valueAt(i) {
			return true
		}
	}
	return false
}

func (page *booleanPage) bounds() (min, max bool) {
	hasFalse, hasTrue := false, false

	for i := 0; i < int(page.numValues); i++ {
		v := page.valueAt(i)
		if v {
			hasTrue = true
		} else {
			hasFalse = true
		}
		if hasTrue && hasFalse {
			break
		}
	}

	min = !hasFalse
	max = hasTrue
	return min, max
}

func (page *booleanPage) Bounds() (min, max Value, ok bool) {
	if ok = page.numValues > 0; ok {
		minBool, maxBool := page.bounds()
		min = page.makeValue(minBool)
		max = page.makeValue(maxBool)
	}
	return min, max, ok
}

func (page *booleanPage) Slice(i, j int64) Page {
	off := i / 8
	end := j / 8

	if (j % 8) != 0 {
		end++
	}

	return &booleanPage{
		typ:         page.typ,
		bits:        page.bits[off:end],
		offset:      int32(i % 8),
		numValues:   int32(j - i),
		columnIndex: page.columnIndex,
	}
}

func (page *booleanPage) makeValue(v bool) Value {
	value := makeValueBoolean(v)
	value.columnIndex = page.columnIndex
	return value
}

type int32Page struct {
	typ         Type
	values      []int32
	columnIndex int16
}

func newInt32Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int32Page {
	return &int32Page{
		typ:         typ,
		values:      values.Int32()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *int32Page) Type() Type { return page.typ }

func (page *int32Page) Column() int { return int(^page.columnIndex) }

func (page *int32Page) Dictionary() Dictionary { return nil }

func (page *int32Page) NumRows() int64 { return int64(len(page.values)) }

func (page *int32Page) NumValues() int64 { return int64(len(page.values)) }

func (page *int32Page) NumNulls() int64 { return 0 }

func (page *int32Page) Size() int64 { return 4 * int64(len(page.values)) }

func (page *int32Page) RepetitionLevels() []byte { return nil }

func (page *int32Page) DefinitionLevels() []byte { return nil }

func (page *int32Page) Data() encoding.Values { return encoding.Int32Values(page.values) }

func (page *int32Page) Values() ValueReader { return &int32PageValues{page: page} }

func (page *int32Page) min() int32 { return minInt32(page.values) }

func (page *int32Page) max() int32 { return maxInt32(page.values) }

func (page *int32Page) bounds() (min, max int32) { return boundsInt32(page.values) }

func (page *int32Page) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minInt32, maxInt32 := page.bounds()
		min = page.makeValue(minInt32)
		max = page.makeValue(maxInt32)
	}
	return min, max, ok
}

func (page *int32Page) Slice(i, j int64) Page {
	return &int32Page{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *int32Page) makeValue(v int32) Value {
	value := makeValueInt32(v)
	value.columnIndex = page.columnIndex
	return value
}

type int64Page struct {
	typ         Type
	values      []int64
	columnIndex int16
}

func newInt64Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int64Page {
	return &int64Page{
		typ:         typ,
		values:      values.Int64()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *int64Page) Type() Type { return page.typ }

func (page *int64Page) Column() int { return int(^page.columnIndex) }

func (page *int64Page) Dictionary() Dictionary { return nil }

func (page *int64Page) NumRows() int64 { return int64(len(page.values)) }

func (page *int64Page) NumValues() int64 { return int64(len(page.values)) }

func (page *int64Page) NumNulls() int64 { return 0 }

func (page *int64Page) Size() int64 { return 8 * int64(len(page.values)) }

func (page *int64Page) RepetitionLevels() []byte { return nil }

func (page *int64Page) DefinitionLevels() []byte { return nil }

func (page *int64Page) Data() encoding.Values { return encoding.Int64Values(page.values) }

func (page *int64Page) Values() ValueReader { return &int64PageValues{page: page} }

func (page *int64Page) min() int64 { return minInt64(page.values) }

func (page *int64Page) max() int64 { return maxInt64(page.values) }

func (page *int64Page) bounds() (min, max int64) { return boundsInt64(page.values) }

func (page *int64Page) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minInt64, maxInt64 := page.bounds()
		min = page.makeValue(minInt64)
		max = page.makeValue(maxInt64)
	}
	return min, max, ok
}

func (page *int64Page) Slice(i, j int64) Page {
	return &int64Page{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *int64Page) makeValue(v int64) Value {
	value := makeValueInt64(v)
	value.columnIndex = page.columnIndex
	return value
}

type int96Page struct {
	typ         Type
	values      []deprecated.Int96
	columnIndex int16
}

func newInt96Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *int96Page {
	return &int96Page{
		typ:         typ,
		values:      values.Int96()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *int96Page) Type() Type { return page.typ }

func (page *int96Page) Column() int { return int(^page.columnIndex) }

func (page *int96Page) Dictionary() Dictionary { return nil }

func (page *int96Page) NumRows() int64 { return int64(len(page.values)) }

func (page *int96Page) NumValues() int64 { return int64(len(page.values)) }

func (page *int96Page) NumNulls() int64 { return 0 }

func (page *int96Page) Size() int64 { return 12 * int64(len(page.values)) }

func (page *int96Page) RepetitionLevels() []byte { return nil }

func (page *int96Page) DefinitionLevels() []byte { return nil }

func (page *int96Page) Data() encoding.Values { return encoding.Int96Values(page.values) }

func (page *int96Page) Values() ValueReader { return &int96PageValues{page: page} }

func (page *int96Page) min() deprecated.Int96 { return deprecated.MinInt96(page.values) }

func (page *int96Page) max() deprecated.Int96 { return deprecated.MaxInt96(page.values) }

func (page *int96Page) bounds() (min, max deprecated.Int96) {
	return deprecated.MinMaxInt96(page.values)
}

func (page *int96Page) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minInt96, maxInt96 := page.bounds()
		min = page.makeValue(minInt96)
		max = page.makeValue(maxInt96)
	}
	return min, max, ok
}

func (page *int96Page) Slice(i, j int64) Page {
	return &int96Page{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *int96Page) makeValue(v deprecated.Int96) Value {
	value := makeValueInt96(v)
	value.columnIndex = page.columnIndex
	return value
}

type floatPage struct {
	typ         Type
	values      []float32
	columnIndex int16
}

func newFloatPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *floatPage {
	return &floatPage{
		typ:         typ,
		values:      values.Float()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *floatPage) Type() Type { return page.typ }

func (page *floatPage) Column() int { return int(^page.columnIndex) }

func (page *floatPage) Dictionary() Dictionary { return nil }

func (page *floatPage) NumRows() int64 { return int64(len(page.values)) }

func (page *floatPage) NumValues() int64 { return int64(len(page.values)) }

func (page *floatPage) NumNulls() int64 { return 0 }

func (page *floatPage) Size() int64 { return 4 * int64(len(page.values)) }

func (page *floatPage) RepetitionLevels() []byte { return nil }

func (page *floatPage) DefinitionLevels() []byte { return nil }

func (page *floatPage) Data() encoding.Values { return encoding.FloatValues(page.values) }

func (page *floatPage) Values() ValueReader { return &floatPageValues{page: page} }

func (page *floatPage) min() float32 { return minFloat32(page.values) }

func (page *floatPage) max() float32 { return maxFloat32(page.values) }

func (page *floatPage) bounds() (min, max float32) { return boundsFloat32(page.values) }

func (page *floatPage) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minFloat32, maxFloat32 := page.bounds()
		min = page.makeValue(minFloat32)
		max = page.makeValue(maxFloat32)
	}
	return min, max, ok
}

func (page *floatPage) Slice(i, j int64) Page {
	return &floatPage{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *floatPage) makeValue(v float32) Value {
	value := makeValueFloat(v)
	value.columnIndex = page.columnIndex
	return value
}

type doublePage struct {
	typ         Type
	values      []float64
	columnIndex int16
}

func newDoublePage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *doublePage {
	return &doublePage{
		typ:         typ,
		values:      values.Double()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *doublePage) Type() Type { return page.typ }

func (page *doublePage) Column() int { return int(^page.columnIndex) }

func (page *doublePage) Dictionary() Dictionary { return nil }

func (page *doublePage) NumRows() int64 { return int64(len(page.values)) }

func (page *doublePage) NumValues() int64 { return int64(len(page.values)) }

func (page *doublePage) NumNulls() int64 { return 0 }

func (page *doublePage) Size() int64 { return 8 * int64(len(page.values)) }

func (page *doublePage) RepetitionLevels() []byte { return nil }

func (page *doublePage) DefinitionLevels() []byte { return nil }

func (page *doublePage) Data() encoding.Values { return encoding.DoubleValues(page.values) }

func (page *doublePage) Values() ValueReader { return &doublePageValues{page: page} }

func (page *doublePage) min() float64 { return minFloat64(page.values) }

func (page *doublePage) max() float64 { return maxFloat64(page.values) }

func (page *doublePage) bounds() (min, max float64) { return boundsFloat64(page.values) }

func (page *doublePage) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minFloat64, maxFloat64 := page.bounds()
		min = page.makeValue(minFloat64)
		max = page.makeValue(maxFloat64)
	}
	return min, max, ok
}

func (page *doublePage) Slice(i, j int64) Page {
	return &doublePage{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *doublePage) makeValue(v float64) Value {
	value := makeValueDouble(v)
	value.columnIndex = page.columnIndex
	return value
}

type byteArrayPage struct {
	typ         Type
	values      []byte
	offsets     []uint32
	columnIndex int16
}

func newByteArrayPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *byteArrayPage {
	data, offsets := values.ByteArray()
	return &byteArrayPage{
		typ:         typ,
		values:      data,
		offsets:     offsets[:numValues+1],
		columnIndex: ^columnIndex,
	}
}

func (page *byteArrayPage) Type() Type { return page.typ }

func (page *byteArrayPage) Column() int { return int(^page.columnIndex) }

func (page *byteArrayPage) Dictionary() Dictionary { return nil }

func (page *byteArrayPage) NumRows() int64 { return int64(page.len()) }

func (page *byteArrayPage) NumValues() int64 { return int64(page.len()) }

func (page *byteArrayPage) NumNulls() int64 { return 0 }

func (page *byteArrayPage) Size() int64 { return int64(len(page.values)) + 4*int64(len(page.offsets)) }

func (page *byteArrayPage) RepetitionLevels() []byte { return nil }

func (page *byteArrayPage) DefinitionLevels() []byte { return nil }

func (page *byteArrayPage) Data() encoding.Values {
	return encoding.ByteArrayValues(page.values, page.offsets)
}

func (page *byteArrayPage) Values() ValueReader { return &byteArrayPageValues{page: page} }

func (page *byteArrayPage) len() int { return len(page.offsets) - 1 }

func (page *byteArrayPage) index(i int) []byte {
	j := page.offsets[i+0]
	k := page.offsets[i+1]
	return page.values[j:k:k]
}

func (page *byteArrayPage) min() (min []byte) {
	if n := page.len(); n > 0 {
		min = page.index(0)

		for i := 1; i < n; i++ {
			v := page.index(i)

			if bytes.Compare(v, min) < 0 {
				min = v
			}
		}
	}
	return min
}

func (page *byteArrayPage) max() (max []byte) {
	if n := page.len(); n > 0 {
		max = page.index(0)

		for i := 1; i < n; i++ {
			v := page.index(i)

			if bytes.Compare(v, max) > 0 {
				max = v
			}
		}
	}
	return max
}

func (page *byteArrayPage) bounds() (min, max []byte) {
	if n := page.len(); n > 0 {
		min = page.index(0)
		max = min

		for i := 1; i < n; i++ {
			v := page.index(i)

			switch {
			case bytes.Compare(v, min) < 0:
				min = v
			case bytes.Compare(v, max) > 0:
				max = v
			}
		}
	}
	return min, max
}

func (page *byteArrayPage) Bounds() (min, max Value, ok bool) {
	if ok = len(page.offsets) > 1; ok {
		minBytes, maxBytes := page.bounds()
		min = page.makeValueBytes(minBytes)
		max = page.makeValueBytes(maxBytes)
	}
	return min, max, ok
}

func (page *byteArrayPage) cloneValues() []byte {
	values := make([]byte, len(page.values))
	copy(values, page.values)
	return values
}

func (page *byteArrayPage) cloneOffsets() []uint32 {
	offsets := make([]uint32, len(page.offsets))
	copy(offsets, page.offsets)
	return offsets
}

func (page *byteArrayPage) Slice(i, j int64) Page {
	return &byteArrayPage{
		typ:         page.typ,
		values:      page.values,
		offsets:     page.offsets[i : j+1],
		columnIndex: page.columnIndex,
	}
}

func (page *byteArrayPage) makeValueBytes(v []byte) Value {
	value := makeValueBytes(ByteArray, v)
	value.columnIndex = page.columnIndex
	return value
}

func (page *byteArrayPage) makeValueString(v string) Value {
	value := makeValueString(ByteArray, v)
	value.columnIndex = page.columnIndex
	return value
}

type fixedLenByteArrayPage struct {
	typ         Type
	data        []byte
	size        int
	columnIndex int16
}

func newFixedLenByteArrayPage(typ Type, columnIndex int16, numValues int32, values encoding.Values) *fixedLenByteArrayPage {
	data, size := values.FixedLenByteArray()
	return &fixedLenByteArrayPage{
		typ:         typ,
		data:        data[:int(numValues)*size],
		size:        size,
		columnIndex: ^columnIndex,
	}
}

func (page *fixedLenByteArrayPage) Type() Type { return page.typ }

func (page *fixedLenByteArrayPage) Column() int { return int(^page.columnIndex) }

func (page *fixedLenByteArrayPage) Dictionary() Dictionary { return nil }

func (page *fixedLenByteArrayPage) NumRows() int64 { return int64(len(page.data) / page.size) }

func (page *fixedLenByteArrayPage) NumValues() int64 { return int64(len(page.data) / page.size) }

func (page *fixedLenByteArrayPage) NumNulls() int64 { return 0 }

func (page *fixedLenByteArrayPage) Size() int64 { return int64(len(page.data)) }

func (page *fixedLenByteArrayPage) RepetitionLevels() []byte { return nil }

func (page *fixedLenByteArrayPage) DefinitionLevels() []byte { return nil }

func (page *fixedLenByteArrayPage) Data() encoding.Values {
	return encoding.FixedLenByteArrayValues(page.data, page.size)
}

func (page *fixedLenByteArrayPage) Values() ValueReader {
	return &fixedLenByteArrayPageValues{page: page}
}

func (page *fixedLenByteArrayPage) min() []byte { return minFixedLenByteArray(page.data, page.size) }

func (page *fixedLenByteArrayPage) max() []byte { return maxFixedLenByteArray(page.data, page.size) }

func (page *fixedLenByteArrayPage) bounds() (min, max []byte) {
	return boundsFixedLenByteArray(page.data, page.size)
}

func (page *fixedLenByteArrayPage) Bounds() (min, max Value, ok bool) {
	if ok = len(page.data) > 0; ok {
		minBytes, maxBytes := page.bounds()
		min = page.makeValueBytes(minBytes)
		max = page.makeValueBytes(maxBytes)
	}
	return min, max, ok
}

func (page *fixedLenByteArrayPage) Slice(i, j int64) Page {
	return &fixedLenByteArrayPage{
		typ:         page.typ,
		data:        page.data[i*int64(page.size) : j*int64(page.size)],
		size:        page.size,
		columnIndex: page.columnIndex,
	}
}

func (page *fixedLenByteArrayPage) makeValueBytes(v []byte) Value {
	value := makeValueBytes(FixedLenByteArray, v)
	value.columnIndex = page.columnIndex
	return value
}

func (page *fixedLenByteArrayPage) makeValueString(v string) Value {
	value := makeValueString(FixedLenByteArray, v)
	value.columnIndex = page.columnIndex
	return value
}

type uint32Page struct {
	typ         Type
	values      []uint32
	columnIndex int16
}

func newUint32Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *uint32Page {
	return &uint32Page{
		typ:         typ,
		values:      values.Uint32()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *uint32Page) Type() Type { return page.typ }

func (page *uint32Page) Column() int { return int(^page.columnIndex) }

func (page *uint32Page) Dictionary() Dictionary { return nil }

func (page *uint32Page) NumRows() int64 { return int64(len(page.values)) }

func (page *uint32Page) NumValues() int64 { return int64(len(page.values)) }

func (page *uint32Page) NumNulls() int64 { return 0 }

func (page *uint32Page) Size() int64 { return 4 * int64(len(page.values)) }

func (page *uint32Page) RepetitionLevels() []byte { return nil }

func (page *uint32Page) DefinitionLevels() []byte { return nil }

func (page *uint32Page) Data() encoding.Values { return encoding.Uint32Values(page.values) }

func (page *uint32Page) Values() ValueReader { return &uint32PageValues{page: page} }

func (page *uint32Page) min() uint32 { return minUint32(page.values) }

func (page *uint32Page) max() uint32 { return maxUint32(page.values) }

func (page *uint32Page) bounds() (min, max uint32) { return boundsUint32(page.values) }

func (page *uint32Page) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minUint32, maxUint32 := page.bounds()
		min = page.makeValue(minUint32)
		max = page.makeValue(maxUint32)
	}
	return min, max, ok
}

func (page *uint32Page) Slice(i, j int64) Page {
	return &uint32Page{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *uint32Page) makeValue(v uint32) Value {
	value := makeValueUint32(v)
	value.columnIndex = page.columnIndex
	return value
}

type uint64Page struct {
	typ         Type
	values      []uint64
	columnIndex int16
}

func newUint64Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *uint64Page {
	return &uint64Page{
		typ:         typ,
		values:      values.Uint64()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *uint64Page) Type() Type { return page.typ }

func (page *uint64Page) Column() int { return int(^page.columnIndex) }

func (page *uint64Page) Dictionary() Dictionary { return nil }

func (page *uint64Page) NumRows() int64 { return int64(len(page.values)) }

func (page *uint64Page) NumValues() int64 { return int64(len(page.values)) }

func (page *uint64Page) NumNulls() int64 { return 0 }

func (page *uint64Page) Size() int64 { return 8 * int64(len(page.values)) }

func (page *uint64Page) RepetitionLevels() []byte { return nil }

func (page *uint64Page) DefinitionLevels() []byte { return nil }

func (page *uint64Page) Data() encoding.Values { return encoding.Uint64Values(page.values) }

func (page *uint64Page) Values() ValueReader { return &uint64PageValues{page: page} }

func (page *uint64Page) min() uint64 { return minUint64(page.values) }

func (page *uint64Page) max() uint64 { return maxUint64(page.values) }

func (page *uint64Page) bounds() (min, max uint64) { return boundsUint64(page.values) }

func (page *uint64Page) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minUint64, maxUint64 := page.bounds()
		min = page.makeValue(minUint64)
		max = page.makeValue(maxUint64)
	}
	return min, max, ok
}

func (page *uint64Page) Slice(i, j int64) Page {
	return &uint64Page{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *uint64Page) makeValue(v uint64) Value {
	value := makeValueUint64(v)
	value.columnIndex = page.columnIndex
	return value
}

type be128Page struct {
	typ         Type
	values      [][16]byte
	columnIndex int16
}

func newBE128Page(typ Type, columnIndex int16, numValues int32, values encoding.Values) *be128Page {
	return &be128Page{
		typ:         typ,
		values:      values.Uint128()[:numValues],
		columnIndex: ^columnIndex,
	}
}

func (page *be128Page) Type() Type { return page.typ }

func (page *be128Page) Column() int { return int(^page.columnIndex) }

func (page *be128Page) Dictionary() Dictionary { return nil }

func (page *be128Page) NumRows() int64 { return int64(len(page.values)) }

func (page *be128Page) NumValues() int64 { return int64(len(page.values)) }

func (page *be128Page) NumNulls() int64 { return 0 }

func (page *be128Page) Size() int64 { return 16 * int64(len(page.values)) }

func (page *be128Page) RepetitionLevels() []byte { return nil }

func (page *be128Page) DefinitionLevels() []byte { return nil }

func (page *be128Page) Data() encoding.Values { return encoding.Uint128Values(page.values) }

func (page *be128Page) Values() ValueReader { return &be128PageValues{page: page} }

func (page *be128Page) min() []byte { return minBE128(page.values) }

func (page *be128Page) max() []byte { return maxBE128(page.values) }

func (page *be128Page) bounds() (min, max []byte) { return boundsBE128(page.values) }

func (page *be128Page) Bounds() (min, max Value, ok bool) {
	if ok = len(page.values) > 0; ok {
		minBytes, maxBytes := page.bounds()
		min = page.makeValueBytes(minBytes)
		max = page.makeValueBytes(maxBytes)
	}
	return min, max, ok
}

func (page *be128Page) Slice(i, j int64) Page {
	return &be128Page{
		typ:         page.typ,
		values:      page.values[i:j],
		columnIndex: page.columnIndex,
	}
}

func (page *be128Page) makeValue(v *[16]byte) Value {
	return page.makeValueBytes(v[:])
}

func (page *be128Page) makeValueBytes(v []byte) Value {
	value := makeValueBytes(FixedLenByteArray, v)
	value.columnIndex = page.columnIndex
	return value
}

func (page *be128Page) makeValueString(v string) Value {
	value := makeValueString(FixedLenByteArray, v)
	value.columnIndex = page.columnIndex
	return value
}

type nullPage struct {
	typ    Type
	column int
	count  int
}

func newNullPage(typ Type, columnIndex int16, numValues int32) *nullPage {
	return &nullPage{
		typ:    typ,
		column: int(columnIndex),
		count:  int(numValues),
	}
}

func (page *nullPage) Type() Type                        { return page.typ }
func (page *nullPage) Column() int                       { return page.column }
func (page *nullPage) Dictionary() Dictionary            { return nil }
func (page *nullPage) NumRows() int64                    { return int64(page.count) }
func (page *nullPage) NumValues() int64                  { return int64(page.count) }
func (page *nullPage) NumNulls() int64                   { return int64(page.count) }
func (page *nullPage) Bounds() (min, max Value, ok bool) { return }
func (page *nullPage) Size() int64                       { return 1 }
func (page *nullPage) Values() ValueReader {
	return &nullPageValues{column: page.column, remain: page.count}
}
func (page *nullPage) Slice(i, j int64) Page {
	return &nullPage{column: page.column, count: page.count - int(j-i)}
}
func (page *nullPage) RepetitionLevels() []byte { return nil }
func (page *nullPage) DefinitionLevels() []byte { return nil }
func (page *nullPage) Data() encoding.Values    { return encoding.Values{} }


================================================
FILE: page_bounds.go
================================================
package parquet

import "bytes"

func boundsFixedLenByteArray(data []byte, size int) (min, max []byte) {
	if len(data) > 0 {
		min = data[:size]
		max = data[:size]

		for i, j := size, 2*size; j <= len(data); {
			item := data[i:j]

			if bytes.Compare(item, min) < 0 {
				min = item
			}
			if bytes.Compare(item, max) > 0 {
				max = item
			}

			i += size
			j += size
		}
	}
	return min, max
}


================================================
FILE: page_bounds_amd64.go
================================================
//go:build !purego

package parquet

// The min-max algorithms combine looking for the min and max values in a single
// pass over the data. While the behavior is the same as calling functions to
// look for the min and max values independently, doing both operations at the
// same time means that we only load the data from memory once. When working on
// large arrays the algorithms are limited by memory bandwidth, computing both
// the min and max together shrinks by half the amount of data read from memory.
//
// The following benchmarks results were highlighting the benefits of combining
// the min-max search, compared to calling the min and max functions separately:
//
// name                 old time/op    new time/op    delta
// BoundsInt64/10240KiB    590µs ±15%     330µs ±10%  -44.01%  (p=0.000 n=10+10)
//
// name                 old speed      new speed      delta
// BoundsInt64/10240KiB 17.9GB/s ±13%  31.8GB/s ±11%  +78.13%  (p=0.000 n=10+10)
//
// As expected, since the functions are memory-bound in those cases, and load
// half as much data, we see significant improvements. The gains are not 2x because
// running more AVX-512 instructions in the tight loops causes more contention
// on CPU ports.
//
// Optimizations being trade offs, using min/max functions independently appears
// to yield better throughput when the data resides in CPU caches:
//
// name             old time/op    new time/op    delta
// BoundsInt64/4KiB   52.1ns ± 0%    46.2ns ± 1%  -12.65%  (p=0.000 n=10+10)
//
// name             old speed      new speed      delta
// BoundsInt64/4KiB 78.6GB/s ± 0%  88.6GB/s ± 1%  +11.23%  (p=0.000 n=10+10)
//
// The probable explanation is that in those cases the algorithms are not
// memory-bound anymore, but limited by contention on CPU ports, and the
// individual min/max functions are able to better parallelize the work due
// to running less instructions per loop. The performance starts to equalize
// around 256KiB, and degrade beyond 1MiB, so we use this threshold to determine
// which approach to prefer.
const combinedBoundsThreshold = 1 * 1024 * 1024

//go:noescape
func combinedBoundsBool(data []bool) (min, max bool)

//go:noescape
func combinedBoundsInt32(data []int32) (min, max int32)

//go:noescape
func combinedBoundsInt64(data []int64) (min, max int64)

//go:noescape
func combinedBoundsUint32(data []uint32) (min, max uint32)

//go:noescape
func combinedBoundsUint64(data []uint64) (min, max uint64)

//go:noescape
func combinedBoundsFloat32(data []float32) (min, max float32)

//go:noescape
func combinedBoundsFloat64(data []float64) (min, max float64)

//go:noescape
func combinedBoundsBE128(data [][16]byte) (min, max []byte)

func boundsInt32(data []int32) (min, max int32) {
	if 4*len(data) >= combinedBoundsThreshold {
		return combinedBoundsInt32(data)
	}
	min = minInt32(data)
	max = maxInt32(data)
	return
}

func boundsInt64(data []int64) (min, max int64) {
	if 8*len(data) >= combinedBoundsThreshold {
		return combinedBoundsInt64(data)
	}
	min = minInt64(data)
	max = maxInt64(data)
	return
}

func boundsUint32(data []uint32) (min, max uint32) {
	if 4*len(data) >= combinedBoundsThreshold {
		return combinedBoundsUint32(data)
	}
	min = minUint32(data)
	max = maxUint32(data)
	return
}

func boundsUint64(data []uint64) (min, max uint64) {
	if 8*len(data) >= combinedBoundsThreshold {
		return combinedBoundsUint64(data)
	}
	min = minUint64(data)
	max = maxUint64(data)
	return
}

func boundsFloat32(data []float32) (min, max float32) {
	if 4*len(data) >= combinedBoundsThreshold {
		return combinedBoundsFloat32(data)
	}
	min = minFloat32(data)
	max = maxFloat32(data)
	return
}

func boundsFloat64(data []float64) (min, max float64) {
	if 8*len(data) >= combinedBoundsThreshold {
		return combinedBoundsFloat64(data)
	}
	min = minFloat64(data)
	max = maxFloat64(data)
	return
}

func boundsBE128(data [][16]byte) (min, max []byte) {
	// TODO: min/max BE128 is really complex to vectorize, and the returns
	// were barely better than doing the min and max independently, for all
	// input sizes. We should revisit if we find ways to improve the min or
	// max algorithms which can be transposed to the combined version.
	min = minBE128(data)
	max = maxBE128(data)
	return
}


================================================
FILE: page_bounds_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define bswap128lo 0x08080A0B0C0D0E0F
#define bswap128hi 0x0001020304050607

DATA bswap128+0(SB)/8, $bswap128lo
DATA bswap128+8(SB)/8, $bswap128hi
DATA bswap128+16(SB)/8, $bswap128lo
DATA bswap128+24(SB)/8, $bswap128hi
DATA bswap128+32(SB)/8, $bswap128lo
DATA bswap128+40(SB)/8, $bswap128hi
DATA bswap128+48(SB)/8, $bswap128lo
DATA bswap128+56(SB)/8, $bswap128hi
GLOBL bswap128(SB), RODATA|NOPTR, $64

DATA indexes128+0(SB)/8, $0
DATA indexes128+8(SB)/8, $0
DATA indexes128+16(SB)/8, $1
DATA indexes128+24(SB)/8, $1
DATA indexes128+32(SB)/8, $2
DATA indexes128+40(SB)/8, $2
DATA indexes128+48(SB)/8, $3
DATA indexes128+56(SB)/8, $3
GLOBL indexes128(SB), RODATA|NOPTR, $64

DATA swap64+0(SB)/8, $4
DATA swap64+8(SB)/8, $5
DATA swap64+16(SB)/8, $6
DATA swap64+24(SB)/8, $7
DATA swap64+32(SB)/8, $2
DATA swap64+40(SB)/8, $3
DATA swap64+48(SB)/8, $0
DATA swap64+56(SB)/8, $1
GLOBL swap64(SB), RODATA|NOPTR, $64

DATA swap32+0(SB)/4, $8
DATA swap32+4(SB)/4, $9
DATA swap32+8(SB)/4, $10
DATA swap32+12(SB)/4, $11
DATA swap32+16(SB)/4, $12
DATA swap32+20(SB)/4, $13
DATA swap32+24(SB)/4, $14
DATA swap32+28(SB)/4, $15
DATA swap32+32(SB)/4, $4
DATA swap32+36(SB)/4, $5
DATA swap32+40(SB)/4, $6
DATA swap32+44(SB)/4, $7
DATA swap32+48(SB)/4, $2
DATA swap32+52(SB)/4, $3
DATA swap32+56(SB)/4, $0
DATA swap32+60(SB)/4, $1
GLOBL swap32(SB), RODATA|NOPTR, $64

// func combinedBoundsInt32(data []int32) (min, max int32)
TEXT ·combinedBoundsInt32(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ R8, R8
    XORQ R9, R9

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVLQZX (AX), R8 // min
    MOVLQZX (AX), R9 // max

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTD (AX), Z0
    VPBROADCASTD (AX), Z3
loop32:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VPMINSD Z1, Z0, Z0
    VPMINSD Z2, Z0, Z0
    VPMAXSD Z1, Z3, Z3
    VPMAXSD Z2, Z3, Z3
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VMOVDQU32 swap32+0(SB), Z2
    VPERMI2D Z0, Z0, Z1
    VPERMI2D Z3, Z3, Z2
    VPMINSD Y1, Y0, Y0
    VPMAXSD Y2, Y3, Y3

    VMOVDQU32 swap32+32(SB), Y1
    VMOVDQU32 swap32+32(SB), Y2
    VPERMI2D Y0, Y0, Y1
    VPERMI2D Y3, Y3, Y2
    VPMINSD X1, X0, X0
    VPMAXSD X2, X3, X3

    VMOVDQU32 swap32+48(SB), X1
    VMOVDQU32 swap32+48(SB), X2
    VPERMI2D X0, X0, X1
    VPERMI2D X3, X3, X2
    VPMINSD X1, X0, X0
    VPMAXSD X2, X3, X3
    VZEROUPPER

    MOVQ X0, BX
    MOVQ X3, DX
    MOVL BX, R8
    MOVL DX, R9
    SHRQ $32, BX
    SHRQ $32, DX
    CMPL BX, R8
    CMOVLLT BX, R8
    CMPL DX, R9
    CMOVLGT DX, R9

    CMPQ SI, CX
    JE done
loop:
    MOVLQZX (AX)(SI*4), DX
    CMPL DX, R8
    CMOVLLT DX, R8
    CMPL DX, R9
    CMOVLGT DX, R9
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL R8, min+24(FP)
    MOVL R9, max+28(FP)
    RET

// func combinedBoundsInt64(data []int64) (min, max int64)
TEXT ·combinedBoundsInt64(SB), NOSPLIT, $-40
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ R8, R8
    XORQ R9, R9

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVQ (AX), R8 // min
    MOVQ (AX), R9 // max

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $16
    JB loop

    MOVQ CX, DI
    SHRQ $4, DI
    SHLQ $4, DI
    VPBROADCASTQ (AX), Z0
    VPBROADCASTQ (AX), Z3
loop16:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VPMINSQ Z1, Z0, Z0
    VPMINSQ Z2, Z0, Z0
    VPMAXSQ Z1, Z3, Z3
    VPMAXSQ Z2, Z3, Z3
    ADDQ $16, SI
    CMPQ SI, DI
    JNE loop16

    VMOVDQU32 swap32+0(SB), Z1
    VMOVDQU32 swap32+0(SB), Z2
    VPERMI2D Z0, Z0, Z1
    VPERMI2D Z3, Z3, Z2
    VPMINSQ Y1, Y0, Y0
    VPMAXSQ Y2, Y3, Y3

    VMOVDQU32 swap32+32(SB), Y1
    VMOVDQU32 swap32+32(SB), Y2
    VPERMI2D Y0, Y0, Y1
    VPERMI2D Y3, Y3, Y2
    VPMINSQ X1, X0, X0
    VPMAXSQ X2, X3, X3

    VMOVDQU32 swap32+48(SB), X1
    VMOVDQU32 swap32+48(SB), X2
    VPERMI2D X0, X0, X1
    VPERMI2D X3, X3, X2
    VPMINSQ X1, X0, X0
    VPMAXSQ X2, X3, X3
    VZEROUPPER

    MOVQ X0, R8
    MOVQ X3, R9
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    CMPQ DX, R8
    CMOVQLT DX, R8
    CMPQ DX, R9
    CMOVQGT DX, R9
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ R8, min+24(FP)
    MOVQ R9, max+32(FP)
    RET

// func combinedBoundsUint32(data []uint32) (min, max uint32)
TEXT ·combinedBoundsUint32(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ R8, R8
    XORQ R9, R9

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVLQZX (AX), R8 // min
    MOVLQZX (AX), R9 // max

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTD (AX), Z0
    VPBROADCASTD (AX), Z3
loop32:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VPMINUD Z1, Z0, Z0
    VPMINUD Z2, Z0, Z0
    VPMAXUD Z1, Z3, Z3
    VPMAXUD Z2, Z3, Z3
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VMOVDQU32 swap32+0(SB), Z2
    VPERMI2D Z0, Z0, Z1
    VPERMI2D Z3, Z3, Z2
    VPMINUD Y1, Y0, Y0
    VPMAXUD Y2, Y3, Y3

    VMOVDQU32 swap32+32(SB), Y1
    VMOVDQU32 swap32+32(SB), Y2
    VPERMI2D Y0, Y0, Y1
    VPERMI2D Y3, Y3, Y2
    VPMINUD X1, X0, X0
    VPMAXUD X2, X3, X3

    VMOVDQU32 swap32+48(SB), X1
    VMOVDQU32 swap32+48(SB), X2
    VPERMI2D X0, X0, X1
    VPERMI2D X3, X3, X2
    VPMINUD X1, X0, X0
    VPMAXUD X2, X3, X3
    VZEROUPPER

    MOVQ X0, BX
    MOVQ X3, DX
    MOVL BX, R8
    MOVL DX, R9
    SHRQ $32, BX
    SHRQ $32, DX
    CMPL BX, R8
    CMOVLCS BX, R8
    CMPL DX, R9
    CMOVLHI DX, R9

    CMPQ SI, CX
    JE done
loop:
    MOVLQZX (AX)(SI*4), DX
    CMPL DX, R8
    CMOVLCS DX, R8
    CMPL DX, R9
    CMOVLHI DX, R9
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL R8, min+24(FP)
    MOVL R9, max+28(FP)
    RET

// func combinedBoundsUint64(data []uint64) (min, max uint64)
TEXT ·combinedBoundsUint64(SB), NOSPLIT, $-40
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ R8, R8
    XORQ R9, R9

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVQ (AX), R8 // min
    MOVQ (AX), R9 // max

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $16
    JB loop

    MOVQ CX, DI
    SHRQ $4, DI
    SHLQ $4, DI
    VPBROADCASTQ (AX), Z0
    VPBROADCASTQ (AX), Z3
loop16:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VPMINUQ Z1, Z0, Z0
    VPMINUQ Z2, Z0, Z0
    VPMAXUQ Z1, Z3, Z3
    VPMAXUQ Z2, Z3, Z3
    ADDQ $16, SI
    CMPQ SI, DI
    JNE loop16

    VMOVDQU32 swap32+0(SB), Z1
    VMOVDQU32 swap32+0(SB), Z2
    VPERMI2D Z0, Z0, Z1
    VPERMI2D Z3, Z3, Z2
    VPMINUQ Y1, Y0, Y0
    VPMAXUQ Y2, Y3, Y3

    VMOVDQU32 swap32+32(SB), Y1
    VMOVDQU32 swap32+32(SB), Y2
    VPERMI2D Y0, Y0, Y1
    VPERMI2D Y3, Y3, Y2
    VPMINUQ X1, X0, X0
    VPMAXUQ X2, X3, X3

    VMOVDQU32 swap32+48(SB), X1
    VMOVDQU32 swap32+48(SB), X2
    VPERMI2D X0, X0, X1
    VPERMI2D X3, X3, X2
    VPMINUQ X1, X0, X0
    VPMAXUQ X2, X3, X3
    VZEROUPPER

    MOVQ X0, R8
    MOVQ X3, R9
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    CMPQ DX, R8
    CMOVQCS DX, R8
    CMPQ DX, R9
    CMOVQHI DX, R9
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ R8, min+24(FP)
    MOVQ R9, max+32(FP)
    RET

// func combinedBoundsFloat32(data []float32) (min, max float32)
TEXT ·combinedBoundsFloat32(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ R8, R8
    XORQ R9, R9

    CMPQ CX, $0
    JE done
    XORPS X0, X0
    XORPS X1, X1
    XORQ SI, SI
    MOVLQZX (AX), R8 // min
    MOVLQZX (AX), R9 // max
    MOVQ R8, X0
    MOVQ R9, X1

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTD (AX), Z0
    VPBROADCASTD (AX), Z3
loop32:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VMINPS Z1, Z0, Z0
    VMINPS Z2, Z0, Z0
    VMAXPS Z1, Z3, Z3
    VMAXPS Z2, Z3, Z3
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VMOVDQU32 swap32+0(SB), Z2
    VPERMI2D Z0, Z0, Z1
    VPERMI2D Z3, Z3, Z2
    VMINPS Y1, Y0, Y0
    VMAXPS Y2, Y3, Y3

    VMOVDQU32 swap32+32(SB), Y1
    VMOVDQU32 swap32+32(SB), Y2
    VPERMI2D Y0, Y0, Y1
    VPERMI2D Y3, Y3, Y2
    VMINPS X1, X0, X0
    VMAXPS X2, X3, X3

    VMOVDQU32 swap32+48(SB), X1
    VMOVDQU32 swap32+48(SB), X2
    VPERMI2D X0, X0, X1
    VPERMI2D X3, X3, X2
    VMINPS X1, X0, X0
    VMAXPS X2, X3, X3
    VZEROUPPER

    MOVAPS X0, X1
    MOVAPS X3, X2

    PSRLQ $32, X1
    MOVQ X0, R8
    MOVQ X1, R10
    UCOMISS X0, X1
    CMOVLCS R10, R8

    PSRLQ $32, X2
    MOVQ X3, R9
    MOVQ X2, R11
    UCOMISS X3, X2
    CMOVLHI R11, R9

    CMPQ SI, CX
    JE done
    MOVQ R8, X0
    MOVQ R9, X1
loop:
    MOVLQZX (AX)(SI*4), DX
    MOVQ DX, X2
    UCOMISS X0, X2
    CMOVLCS DX, R8
    UCOMISS X1, X2
    CMOVLHI DX, R9
    MOVQ R8, X0
    MOVQ R9, X1
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL R8, min+24(FP)
    MOVL R9, max+28(FP)
    RET

// func combinedBoundsFloat64(data []float64) (min, max float64)
TEXT ·combinedBoundsFloat64(SB), NOSPLIT, $-40
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ R8, R8
    XORQ R9, R9

    CMPQ CX, $0
    JE done
    XORPD X0, X0
    XORPD X1, X1
    XORQ SI, SI
    MOVQ (AX), R8 // min
    MOVQ (AX), R9 // max
    MOVQ R8, X0
    MOVQ R9, X1

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $16
    JB loop

    MOVQ CX, DI
    SHRQ $4, DI
    SHLQ $4, DI
    VPBROADCASTQ (AX), Z0
    VPBROADCASTQ (AX), Z3
loop16:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VMINPD Z1, Z0, Z0
    VMINPD Z2, Z0, Z0
    VMAXPD Z1, Z3, Z3
    VMAXPD Z2, Z3, Z3
    ADDQ $16, SI
    CMPQ SI, DI
    JNE loop16

    VMOVDQU64 swap32+0(SB), Z1
    VMOVDQU64 swap32+0(SB), Z2
    VPERMI2D Z0, Z0, Z1
    VPERMI2D Z3, Z3, Z2
    VMINPD Y1, Y0, Y0
    VMAXPD Y2, Y3, Y3

    VMOVDQU64 swap32+32(SB), Y1
    VMOVDQU64 swap32+32(SB), Y2
    VPERMI2D Y0, Y0, Y1
    VPERMI2D Y3, Y3, Y2
    VMINPD X1, X0, X0
    VMAXPD X2, X3, X3

    VMOVDQU64 swap32+48(SB), X1
    VMOVDQU64 swap32+48(SB), X2
    VPERMI2D X0, X0, X1
    VPERMI2D X3, X3, X2
    VMINPD X1, X0, X0
    VMAXPD X2, X3, X1
    VZEROUPPER

    MOVQ X0, R8
    MOVQ X1, R9
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    MOVQ DX, X2
    UCOMISD X0, X2
    CMOVQCS DX, R8
    UCOMISD X1, X2
    CMOVQHI DX, R9
    MOVQ R8, X0
    MOVQ R9, X1
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ R8, min+24(FP)
    MOVQ R9, max+32(FP)
    RET


================================================
FILE: page_bounds_purego.go
================================================
//go:build purego || !amd64

package parquet

import (
	"encoding/binary"
)

func boundsInt32(data []int32) (min, max int32) {
	if len(data) > 0 {
		min = data[0]
		max = data[0]

		for _, v := range data[1:] {
			if v < min {
				min = v
			}
			if v > max {
				max = v
			}
		}
	}
	return min, max
}

func boundsInt64(data []int64) (min, max int64) {
	if len(data) > 0 {
		min = data[0]
		max = data[0]

		for _, v := range data[1:] {
			if v < min {
				min = v
			}
			if v > max {
				max = v
			}
		}
	}
	return min, max
}

func boundsUint32(data []uint32) (min, max uint32) {
	if len(data) > 0 {
		min = data[0]
		max = data[0]

		for _, v := range data[1:] {
			if v < min {
				min = v
			}
			if v > max {
				max = v
			}
		}
	}
	return min, max
}

func boundsUint64(data []uint64) (min, max uint64) {
	if len(data) > 0 {
		min = data[0]
		max = data[0]

		for _, v := range data[1:] {
			if v < min {
				min = v
			}
			if v > max {
				max = v
			}
		}
	}
	return min, max
}

func boundsFloat32(data []float32) (min, max float32) {
	if len(data) > 0 {
		min = data[0]
		max = data[0]

		for _, v := range data[1:] {
			if v < min {
				min = v
			}
			if v > max {
				max = v
			}
		}
	}
	return min, max
}

func boundsFloat64(data []float64) (min, max float64) {
	if len(data) > 0 {
		min = data[0]
		max = data[0]

		for _, v := range data[1:] {
			if v < min {
				min = v
			}
			if v > max {
				max = v
			}
		}
	}
	return min, max
}

func boundsBE128(data [][16]byte) (min, max []byte) {
	if len(data) > 0 {
		minHi := binary.BigEndian.Uint64(data[0][:8])
		maxHi := minHi
		minIndex := 0
		maxIndex := 0
		for i := 1; i < len(data); i++ {
			hi := binary.BigEndian.Uint64(data[i][:8])
			lo := binary.BigEndian.Uint64(data[i][8:])
			switch {
			case hi < minHi:
				minHi, minIndex = hi, i
			case hi == minHi:
				minLo := binary.BigEndian.Uint64(data[minIndex][8:])
				if lo < minLo {
					minHi, minIndex = hi, i
				}
			}
			switch {
			case hi > maxHi:
				maxHi, maxIndex = hi, i
			case hi == maxHi:
				maxLo := binary.BigEndian.Uint64(data[maxIndex][8:])
				if lo > maxLo {
					maxHi, maxIndex = hi, i
				}
			}
		}
		min = data[minIndex][:]
		max = data[maxIndex][:]
	}
	return min, max
}


================================================
FILE: page_bounds_test.go
================================================
package parquet

import (
	"bytes"
	"fmt"
	"math/rand"
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go/internal/quick"
)

var benchmarkBufferSizes = [...]int{
	4 * 1024,
	256 * 1024,
	2048 * 1024,
}

func forEachBenchmarkBufferSize(b *testing.B, f func(*testing.B, int)) {
	for _, bufferSize := range benchmarkBufferSizes {
		b.Run(fmt.Sprintf("%dKiB", bufferSize/1024), func(b *testing.B) {
			b.SetBytes(int64(bufferSize))
			f(b, bufferSize)
		})
	}
}

func TestBoundsInt32(t *testing.T) {
	err := quick.Check(func(values []int32) bool {
		min := int32(0)
		max := int32(0)
		if len(values) > 0 {
			min = values[0]
			max = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
				if v > max {
					max = v
				}
			}
		}
		minValue, maxValue := boundsInt32(values)
		return min == minValue && max == maxValue
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBoundsInt64(t *testing.T) {
	err := quick.Check(func(values []int64) bool {
		min := int64(0)
		max := int64(0)
		if len(values) > 0 {
			min = values[0]
			max = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
				if v > max {
					max = v
				}
			}
		}
		minValue, maxValue := boundsInt64(values)
		return min == minValue && max == maxValue
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBoundsUint32(t *testing.T) {
	err := quick.Check(func(values []uint32) bool {
		min := uint32(0)
		max := uint32(0)
		if len(values) > 0 {
			min = values[0]
			max = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
				if v > max {
					max = v
				}
			}
		}
		minValue, maxValue := boundsUint32(values)
		return min == minValue && max == maxValue
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBoundsUint64(t *testing.T) {
	err := quick.Check(func(values []uint64) bool {
		min := uint64(0)
		max := uint64(0)
		if len(values) > 0 {
			min = values[0]
			max = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
				if v > max {
					max = v
				}
			}
		}
		minValue, maxValue := boundsUint64(values)
		return min == minValue && max == maxValue
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBoundsFloat32(t *testing.T) {
	err := quick.Check(func(values []float32) bool {
		min := float32(0)
		max := float32(0)
		if len(values) > 0 {
			min = values[0]
			max = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
				if v > max {
					max = v
				}
			}
		}
		minValue, maxValue := boundsFloat32(values)
		return min == minValue && max == maxValue
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBoundsFloat64(t *testing.T) {
	err := quick.Check(func(values []float64) bool {
		min := float64(0)
		max := float64(0)
		if len(values) > 0 {
			min = values[0]
			max = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
				if v > max {
					max = v
				}
			}
		}
		minValue, maxValue := boundsFloat64(values)
		return min == minValue && max == maxValue
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBE128MinMaxSimilar(t *testing.T) {
	var min [16]byte

	// Test values:
	//   [1 1 ... 1 1]
	//   [0 1 ... 1 1]
	//   ...
	//   [0 0 ... 0 1]
	//   [0 0 ... 0 0]
	for i := 0; i < 17; i++ {
		var max [16]byte
		for j := i; j < 16; j++ {
			max[j] = 1
		}
		testBE182MinMaxPerm(t, min, max)
	}

	// Test values:
	//   [0 0 ... 0 0]
	//   [1 0 ... 0 0]
	//   ...
	//   [1 1 ... 1 0]
	//   [1 1 ... 1 1]
	for i := 0; i < 17; i++ {
		var max [16]byte
		for j := 0; j < i; j++ {
			max[j] = 1
		}
		testBE182MinMaxPerm(t, min, max)
	}
}

func testBE182MinMaxPerm(t *testing.T, min, max [16]byte) {
	testBE128MinMax(t, min[:], max[:], [][16]byte{min, max})
	testBE128MinMax(t, min[:], max[:], [][16]byte{max, min})
}

func testBE128MinMax(t *testing.T, min, max []byte, data [][16]byte) {
	bmin := minBE128(data)
	if !reflect.DeepEqual(bmin, min[:]) {
		t.Errorf("unexpected min value\nexpected %v\n     got %v", min, bmin)
	}

	bmax := maxBE128(data)
	if !reflect.DeepEqual(bmax, max[:]) {
		t.Errorf("unexpected max value\nexpected %v\n     got %v", max, bmax)
	}
}

func TestBoundsBE128(t *testing.T) {
	err := quick.Check(func(values [][16]byte) bool {
		min := [16]byte{}
		max := [16]byte{}
		if len(values) > 0 {
			min = values[0]
			max = values[0]
			for _, v := range values[1:] {
				if bytes.Compare(v[:], min[:]) < 0 {
					min = v
				}
				if bytes.Compare(v[:], max[:]) > 0 {
					max = v
				}
			}
		}
		minValue, maxValue := boundsBE128(values)
		return (len(values) == 0 && minValue == nil && maxValue == nil) ||
			(bytes.Equal(min[:], minValue) && bytes.Equal(max[:], maxValue))
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBoundsFixedLenByteArray(t *testing.T) {
	err := quick.Check(func(values []byte) bool {
		min := [1]byte{}
		max := [1]byte{}
		if len(values) > 0 {
			min[0] = values[0]
			max[0] = values[0]
			for _, v := range values[1:] {
				if v < min[0] {
					min[0] = v
				}
				if v > max[0] {
					max[0] = v
				}
			}
		}
		minValue, maxValue := boundsFixedLenByteArray(values, 1)
		return (len(values) == 0 && minValue == nil && maxValue == nil) ||
			(bytes.Equal(min[:], minValue) && bytes.Equal(max[:], maxValue))
	})
	if err != nil {
		t.Error(err)
	}
}

func BenchmarkBoundsInt32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Int31()
		}
		for i := 0; i < b.N; i++ {
			boundsInt32(values)
		}
	})
}

func BenchmarkBoundsInt64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Int63()
		}
		for i := 0; i < b.N; i++ {
			boundsInt64(values)
		}
	})
}

func BenchmarkBoundsUint32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Uint32()
		}
		for i := 0; i < b.N; i++ {
			boundsUint32(values)
		}
	})
}

func BenchmarkBoundsUint64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Uint64()
		}
		for i := 0; i < b.N; i++ {
			boundsUint64(values)
		}
	})
}

func BenchmarkBoundsFloat32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Float32()
		}
		for i := 0; i < b.N; i++ {
			boundsFloat32(values)
		}
	})
}

func BenchmarkBoundsFloat64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Float64()
		}
		for i := 0; i < b.N; i++ {
			boundsFloat64(values)
		}
	})
}

func BenchmarkBoundsBE128(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([][16]byte, bufferSize)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			prng.Read(values[i][:])
		}
		for i := 0; i < b.N; i++ {
			boundsBE128(values)
		}
	})
}

func BenchmarkBoundsFixedLenByteArray(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]byte, bufferSize)
		prng := rand.New(rand.NewSource(1))
		prng.Read(values)
		for i := 0; i < b.N; i++ {
			boundsFixedLenByteArray(values, 32)
		}
	})
}


================================================
FILE: page_header.go
================================================
package parquet

import (
	"fmt"

	"github.com/segmentio/parquet-go/format"
)

// PageHeader is an interface implemented by parquet page headers.
type PageHeader interface {
	// Returns the number of values in the page (including nulls).
	NumValues() int64

	// Returns the page encoding.
	Encoding() format.Encoding

	// Returns the parquet format page type.
	PageType() format.PageType
}

// DataPageHeader is a specialization of the PageHeader interface implemented by
// data pages.
type DataPageHeader interface {
	PageHeader

	// Returns the encoding of the repetition level section.
	RepetitionLevelEncoding() format.Encoding

	// Returns the encoding of the definition level section.
	DefinitionLevelEncoding() format.Encoding

	// Returns the number of null values in the page.
	NullCount() int64

	// Returns the minimum value in the page based on the ordering rules of the
	// column's logical type.
	//
	// As an optimization, the method may return the same slice across multiple
	// calls. Programs must treat the returned value as immutable to prevent
	// unpredictable behaviors.
	//
	// If the page only contains only null values, an empty slice is returned.
	MinValue() []byte

	// Returns the maximum value in the page based on the ordering rules of the
	// column's logical type.
	//
	// As an optimization, the method may return the same slice across multiple
	// calls. Programs must treat the returned value as immutable to prevent
	// unpredictable behaviors.
	//
	// If the page only contains only null values, an empty slice is returned.
	MaxValue() []byte
}

// DictionaryPageHeader is an implementation of the PageHeader interface
// representing dictionary pages.
type DictionaryPageHeader struct {
	header *format.DictionaryPageHeader
}

func (dict DictionaryPageHeader) NumValues() int64 {
	return int64(dict.header.NumValues)
}

func (dict DictionaryPageHeader) Encoding() format.Encoding {
	return dict.header.Encoding
}

func (dict DictionaryPageHeader) PageType() format.PageType {
	return format.DictionaryPage
}

func (dict DictionaryPageHeader) IsSorted() bool {
	return dict.header.IsSorted
}

func (dict DictionaryPageHeader) String() string {
	return fmt.Sprintf("DICTIONARY_PAGE_HEADER{NumValues=%d,Encoding=%s,IsSorted=%t}",
		dict.header.NumValues,
		dict.header.Encoding,
		dict.header.IsSorted)
}

// DataPageHeaderV1 is an implementation of the DataPageHeader interface
// representing data pages version 1.
type DataPageHeaderV1 struct {
	header *format.DataPageHeader
}

func (v1 DataPageHeaderV1) NumValues() int64 {
	return int64(v1.header.NumValues)
}

func (v1 DataPageHeaderV1) RepetitionLevelEncoding() format.Encoding {
	return v1.header.RepetitionLevelEncoding
}

func (v1 DataPageHeaderV1) DefinitionLevelEncoding() format.Encoding {
	return v1.header.DefinitionLevelEncoding
}

func (v1 DataPageHeaderV1) Encoding() format.Encoding {
	return v1.header.Encoding
}

func (v1 DataPageHeaderV1) PageType() format.PageType {
	return format.DataPage
}

func (v1 DataPageHeaderV1) NullCount() int64 {
	return v1.header.Statistics.NullCount
}

func (v1 DataPageHeaderV1) MinValue() []byte {
	return v1.header.Statistics.MinValue
}

func (v1 DataPageHeaderV1) MaxValue() []byte {
	return v1.header.Statistics.MaxValue
}

func (v1 DataPageHeaderV1) String() string {
	return fmt.Sprintf("DATA_PAGE_HEADER{NumValues=%d,Encoding=%s}",
		v1.header.NumValues,
		v1.header.Encoding)
}

// DataPageHeaderV2 is an implementation of the DataPageHeader interface
// representing data pages version 2.
type DataPageHeaderV2 struct {
	header *format.DataPageHeaderV2
}

func (v2 DataPageHeaderV2) NumValues() int64 {
	return int64(v2.header.NumValues)
}

func (v2 DataPageHeaderV2) NumNulls() int64 {
	return int64(v2.header.NumNulls)
}

func (v2 DataPageHeaderV2) NumRows() int64 {
	return int64(v2.header.NumRows)
}

func (v2 DataPageHeaderV2) RepetitionLevelsByteLength() int64 {
	return int64(v2.header.RepetitionLevelsByteLength)
}

func (v2 DataPageHeaderV2) DefinitionLevelsByteLength() int64 {
	return int64(v2.header.DefinitionLevelsByteLength)
}

func (v2 DataPageHeaderV2) RepetitionLevelEncoding() format.Encoding {
	return format.RLE
}

func (v2 DataPageHeaderV2) DefinitionLevelEncoding() format.Encoding {
	return format.RLE
}

func (v2 DataPageHeaderV2) Encoding() format.Encoding {
	return v2.header.Encoding
}

func (v2 DataPageHeaderV2) PageType() format.PageType {
	return format.DataPageV2
}

func (v2 DataPageHeaderV2) NullCount() int64 {
	return v2.header.Statistics.NullCount
}

func (v2 DataPageHeaderV2) MinValue() []byte {
	return v2.header.Statistics.MinValue
}

func (v2 DataPageHeaderV2) MaxValue() []byte {
	return v2.header.Statistics.MaxValue
}

func (v2 DataPageHeaderV2) IsCompressed() bool {
	return v2.header.IsCompressed == nil || *v2.header.IsCompressed
}

func (v2 DataPageHeaderV2) String() string {
	return fmt.Sprintf("DATA_PAGE_HEADER_V2{NumValues=%d,NumNulls=%d,NumRows=%d,Encoding=%s,IsCompressed=%t}",
		v2.header.NumValues,
		v2.header.NumNulls,
		v2.header.NumRows,
		v2.header.Encoding,
		v2.IsCompressed())
}

type unknownPageHeader struct {
	header *format.PageHeader
}

func (u unknownPageHeader) NumValues() int64 {
	return 0
}

func (u unknownPageHeader) Encoding() format.Encoding {
	return -1
}

func (u unknownPageHeader) PageType() format.PageType {
	return u.header.Type
}

func (u unknownPageHeader) String() string {
	return fmt.Sprintf("UNKNOWN_PAGE_HEADER{Type=%d}", u.header.Type)
}

var (
	_ PageHeader     = DictionaryPageHeader{}
	_ DataPageHeader = DataPageHeaderV1{}
	_ DataPageHeader = DataPageHeaderV2{}
	_ PageHeader     = unknownPageHeader{}
)


================================================
FILE: page_max.go
================================================
package parquet

import (
	"bytes"
)

func maxFixedLenByteArray(data []byte, size int) (max []byte) {
	if len(data) > 0 {
		max = data[:size]

		for i, j := size, 2*size; j <= len(data); {
			item := data[i:j]

			if bytes.Compare(item, max) > 0 {
				max = item
			}

			i += size
			j += size
		}
	}
	return max
}


================================================
FILE: page_max_amd64.go
================================================
//go:build !purego

package parquet

//go:noescape
func maxInt32(data []int32) int32

//go:noescape
func maxInt64(data []int64) int64

//go:noescape
func maxUint32(data []uint32) uint32

//go:noescape
func maxUint64(data []uint64) uint64

//go:noescape
func maxFloat32(data []float32) float32

//go:noescape
func maxFloat64(data []float64) float64

//go:noescape
func maxBE128(data [][16]byte) []byte


================================================
FILE: page_max_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func maxInt32(data []int32) int32
TEXT ·maxInt32(SB), NOSPLIT, $-28
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVLQZX (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTD (AX), Z0
loop32:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VPMAXSD Z1, Z0, Z0
    VPMAXSD Z2, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMAXSD Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMAXSD X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMAXSD X1, X0, X0
    VZEROUPPER

    MOVQ X0, DX
    MOVL DX, BX
    SHRQ $32, DX
    CMPL DX, BX
    CMOVLGT DX, BX

    CMPQ SI, CX
    JE done
loop:
    MOVLQZX (AX)(SI*4), DX
    CMPL DX, BX
    CMOVLGT DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL BX, ret+24(FP)
    RET

// func maxInt64(data []int64) int64
TEXT ·maxInt64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVQ (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTQ (AX), Z0
loop32:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VMOVDQU64 128(AX)(SI*8), Z3
    VMOVDQU64 192(AX)(SI*8), Z4
    VPMAXSQ Z1, Z2, Z5
    VPMAXSQ Z3, Z4, Z6
    VPMAXSQ Z5, Z6, Z1
    VPMAXSQ Z1, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMAXSQ Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMAXSQ X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMAXSQ X1, X0, X0
    VZEROUPPER

    MOVQ X0, BX
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    CMPQ DX, BX
    CMOVQGT DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ BX, ret+24(FP)
    RET

// func maxUint32(data []int32) int32
TEXT ·maxUint32(SB), NOSPLIT, $-28
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVLQZX (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTD (AX), Z0
loop32:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VPMAXUD Z1, Z0, Z0
    VPMAXUD Z2, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMAXUD Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMAXUD X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMAXUD X1, X0, X0
    VZEROUPPER

    MOVQ X0, DX
    MOVL DX, BX
    SHRQ $32, DX
    CMPL DX, BX
    CMOVLHI DX, BX

    CMPQ SI, CX
    JE done
loop:
    MOVLQZX (AX)(SI*4), DX
    CMPL DX, BX
    CMOVLHI DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL BX, ret+24(FP)
    RET

// func maxUint64(data []uint64) uint64
TEXT ·maxUint64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVQ (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTQ (AX), Z0
loop32:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VMOVDQU64 128(AX)(SI*8), Z3
    VMOVDQU64 192(AX)(SI*8), Z4
    VPMAXUQ Z1, Z2, Z5
    VPMAXUQ Z3, Z4, Z6
    VPMAXUQ Z5, Z6, Z1
    VPMAXUQ Z1, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMAXUQ Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMAXUQ X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMAXUQ X1, X0, X0
    VZEROUPPER

    MOVQ X0, BX
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    CMPQ DX, BX
    CMOVQHI DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ BX, ret+24(FP)
    RET

// func maxFloat32(data []float32) float32
TEXT ·maxFloat32(SB), NOSPLIT, $-28
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORPS X0, X0
    XORPS X1, X1
    XORQ SI, SI
    MOVLQZX (AX), BX
    MOVQ BX, X0

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $64
    JB loop

    MOVQ CX, DI
    SHRQ $6, DI
    SHLQ $6, DI
    VPBROADCASTD (AX), Z0
loop64:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VMOVDQU32 128(AX)(SI*4), Z3
    VMOVDQU32 192(AX)(SI*4), Z4
    VMAXPS Z1, Z2, Z5
    VMAXPS Z3, Z4, Z6
    VMAXPS Z5, Z6, Z1
    VMAXPS Z1, Z0, Z0
    ADDQ $64, SI
    CMPQ SI, DI
    JNE loop64

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VMAXPS Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VMAXPS X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VMAXPS X1, X0, X0
    VZEROUPPER

    MOVAPS X0, X1
    PSRLQ $32, X1
    MOVQ X0, BX
    MOVQ X1, DX
    UCOMISS X0, X1
    CMOVLHI DX, BX

    CMPQ SI, CX
    JE done
    MOVQ BX, X0
loop:
    MOVLQZX (AX)(SI*4), DX
    MOVQ DX, X1
    UCOMISS X0, X1
    CMOVLHI DX, BX
    MOVQ BX, X0
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL BX, ret+24(FP)
    RET

// func maxFloat64(data []float64) float64
TEXT ·maxFloat64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORPD X0, X0
    XORPD X1, X1
    XORQ SI, SI
    MOVQ (AX), BX
    MOVQ BX, X0

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTQ (AX), Z0
loop32:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VMOVDQU64 128(AX)(SI*8), Z3
    VMOVDQU64 192(AX)(SI*8), Z4
    VMAXPD Z1, Z2, Z5
    VMAXPD Z3, Z4, Z6
    VMAXPD Z5, Z6, Z1
    VMAXPD Z1, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU64 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VMAXPD Y1, Y0, Y0

    VMOVDQU64 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VMAXPD X1, X0, X0

    VMOVDQU64 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VMAXPD X1, X0, X0
    VZEROUPPER

    MOVQ X0, BX
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    MOVQ DX, X1
    UCOMISD X0, X1
    CMOVQHI DX, BX
    MOVQ BX, X0
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ BX, ret+24(FP)
    RET

// vpmaxu128 is a macro comparing unsigned 128 bits values held in the
// `srcValues` and `maxValues` vectors. The `srcIndexes` and `maxIndexes`
// vectors contain the indexes of elements in the value vectors. Remaining
// K and R arguments are mask and general purpose registers needed to hold
// temporary values during the computation. The last M argument is a mask
// generated by vpmaxu128mask.
//
// The routine uses AVX-512 instructions (VPCMPUQ, VPBLENDMQ) to implement
// the comparison of 128 bits values. The values are expected to be stored
// in the vectors as a little-endian pair of two consecutive quad words.
//
// The results are written to the `maxValues` and `maxIndexes` vectors,
// overwriting the inputs. `srcValues` and `srcIndexes` are read-only
// parameters.
//
// At a high level, for two pairs of quad words formaxg two 128 bits values
// A and B, the test implemented by this macro is:
//
//   A[1] > B[1] || (A[1] == B[1] && A[0] > B[0])
//
// Values in the source vector that evaluate to true on this expression are
// written to the vector of maximum values, and their indexes are written to
// the vector of indexes.
#define vpmaxu128(srcValues, srcIndexes, maxValues, maxIndexes, K1, K2, R1, R2, R3, M) \
    VPCMPUQ $0, maxValues, srcValues, K1 \
    VPCMPUQ $6, maxValues, srcValues, K2 \
    KMOVB K1, R1 \
    KMOVB K2, R2 \
    MOVB R2, R3 \
    SHLB $1, R3 \
    ANDB R3, R1 \
    ORB R2, R1 \
    ANDB M, R1 \
    MOVB R1, R2 \
    SHRB $1, R2 \
    ORB R2, R1 \
    KMOVB R1, K1 \
    VPBLENDMQ srcValues, maxValues, K1, maxValues \
    VPBLENDMQ srcIndexes, maxIndexes, K1, maxIndexes

// vpmaxu128mask is a macro used to initialize the mask passed as last argument
// to vpmaxu128. The argument M is intended to be a general purpose register.
//
// The bit mask is used to merge the results of the "greater than" and "equal"
// comparison that are performed on each lane of maximum vectors. The upper bits
// are used to compute results of the operation to determine which of the pairs
// of quad words representing the 128 bits elements are the maximums.
#define vpmaxu128mask(M) MOVB $0b10101010, M

// func maxBE128(data [][16]byte) []byte
TEXT ·maxBE128(SB), NOSPLIT, $-48
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    CMPQ CX, $0
    JE null

    SHLQ $4, CX
    MOVQ CX, DX // len
    MOVQ AX, BX // max
    ADDQ AX, CX // end

    CMPQ DX, $256
    JB loop

    CMPB ·hasAVX512MinMaxBE128(SB), $0
    JE loop

    // Z19 holds a vector of the count by which we increment the vectors of
    // swap at each loop iteration.
    MOVQ $16, DI
    VPBROADCASTQ DI, Z19

    // Z31 holds the shuffle mask used to convert 128 bits elements from big to
    // little endian so we can apply vectorized comparison instructions.
    VMOVDQU64 bswap128(SB), Z31

    // These vectors hold four lanes of maximum values found in the input.
    VBROADCASTI64X2 (AX), Z0
    VPSHUFB Z31, Z0, Z0
    VMOVDQU64 Z0, Z5
    VMOVDQU64 Z0, Z10
    VMOVDQU64 Z0, Z15

    // These vectors hold four lanes of swap of maximum values.
    //
    // We initialize them at zero because we broadcast the first value of the
    // input in the vectors that track the maximums of each lane; in other
    // words, we assume the maximum value is at the first offset and work our
    // way up from there.
    VPXORQ Z2, Z2, Z2
    VPXORQ Z7, Z7, Z7
    VPXORQ Z12, Z12, Z12
    VPXORQ Z17, Z17, Z17

    // These vectors are used to compute the swap of maximum values held
    // in [Z1, Z5, Z10, Z15]. Each vector holds a contiguous sequence of
    // swap; for example, Z3 is initialized with [0, 1, 2, 3]. At each
    // loop iteration, the swap are incremented by the number of elements
    // consumed from the input (4x4=16).
    VMOVDQU64 indexes128(SB), Z3
    VPXORQ Z8, Z8, Z8
    VPXORQ Z13, Z13, Z13
    VPXORQ Z18, Z18, Z18
    MOVQ $4, DI
    VPBROADCASTQ DI, Z1
    VPADDQ Z1, Z3, Z8
    VPADDQ Z1, Z8, Z13
    VPADDQ Z1, Z13, Z18

    // This bit mask is used to merge the results of the "less than" and "equal"
    // comparison that we perform on each lane of maximum vectors. We use the
    // upper bits to compute four results of the operation which determines
    // which of the pair of quad words representing the 128 bits elements is the
    // maximum.
    vpmaxu128mask(DI)
    SHRQ $8, DX
    SHLQ $8, DX
    ADDQ AX, DX
loop16:
    // Compute 4x4 maximum values in vector registers, along with their swap
    // in the input array.
    VMOVDQU64 (AX), Z1
    VMOVDQU64 64(AX), Z6
    VMOVDQU64 128(AX), Z11
    VMOVDQU64 192(AX), Z16
    VPSHUFB Z31, Z1, Z1
    VPSHUFB Z31, Z6, Z6
    VPSHUFB Z31, Z11, Z11
    VPSHUFB Z31, Z16, Z16
    vpmaxu128(Z1, Z3, Z0, Z2, K1, K2, R8, R9, R10, DI)
    vpmaxu128(Z6, Z8, Z5, Z7, K3, K4, R11, R12, R13, DI)
    vpmaxu128(Z11, Z13, Z10, Z12, K1, K2, R8, R9, R10, DI)
    vpmaxu128(Z16, Z18, Z15, Z17, K3, K4, R11, R12, R13, DI)
    VPADDQ Z19, Z3, Z3
    VPADDQ Z19, Z8, Z8
    VPADDQ Z19, Z13, Z13
    VPADDQ Z19, Z18, Z18
    ADDQ $256, AX
    CMPQ AX, DX
    JB loop16

    // After the loop completed, we need to merge the lanes that each contain
    // 4 maximum values (so 16 total candidate at this stage). The results are
    // reduced into 4 candidates in Z0, with their swap in Z2.
    vpmaxu128(Z10, Z12, Z0, Z2, K1, K2, R8, R9, R10, DI)
    vpmaxu128(Z15, Z17, Z5, Z7, K3, K4, R11, R12, R13, DI)
    vpmaxu128(Z5, Z7, Z0, Z2, K1, K2, R8, R9, R10, DI)

    // Further reduce the results by swapping the upper and lower parts of the
    // vector registers, and comparing them to determaxe which values are the
    // smallest. We compare 2x2 values at this step, then 2x1 values at the next
    // to find the index of the maximum.
    VMOVDQU64 swap64+0(SB), Z1
    VMOVDQU64 swap64+0(SB), Z3
    VPERMI2Q Z0, Z0, Z1
    VPERMI2Q Z2, Z2, Z3
    vpmaxu128(Y1, Y3, Y0, Y2, K1, K2, R8, R9, R10, DI)

    VMOVDQU64 swap64+32(SB), Y1
    VMOVDQU64 swap64+32(SB), Y3
    VPERMI2Q Y0, Y0, Y1
    VPERMI2Q Y2, Y2, Y3
    vpmaxu128(X1, X3, X0, X2, K1, K2, R8, R9, R10, DI)
    VZEROUPPER

    // Extract the index of the maximum value computed in the lower 64 bits of
    // X2 and position the BX pointer at the index of the maximum value.
    MOVQ X2, DX
    SHLQ $4, DX
    ADDQ DX, BX
    CMPQ AX, CX
    JE done

    // Unless the input was aligned on 256 bytes, we need to perform a few more
    // iterations on the remaining elements.
    //
    // This loop is also taken if the CPU has no support for AVX-512.
loop:
    MOVQ (AX), R8
    MOVQ (BX), R9
    BSWAPQ R8
    BSWAPQ R9
    CMPQ R8, R9
    JA more
    JB next
    MOVQ 8(AX), R8
    MOVQ 8(BX), R9
    BSWAPQ R8
    BSWAPQ R9
    CMPQ R8, R9
    JBE next
more:
    MOVQ AX, BX
next:
    ADDQ $16, AX
    CMPQ AX, CX
    JB loop
done:
    MOVQ BX, ret_base+24(FP)
    MOVQ $16, ret_len+32(FP)
    MOVQ $16, ret_cap+40(FP)
    RET
null:
    XORQ BX, BX
    MOVQ BX, ret_base+24(FP)
    MOVQ BX, ret_len+32(FP)
    MOVQ BX, ret_cap+40(FP)
    RET


================================================
FILE: page_max_purego.go
================================================
//go:build purego || !amd64

package parquet

import "encoding/binary"

// -----------------------------------------------------------------------------
// TODO: use generics versions of the these functions to reduce the amount of
// code to maintain when we drop compatilibty with Go version older than 1.18.
// -----------------------------------------------------------------------------

func maxInt32(data []int32) (max int32) {
	if len(data) > 0 {
		max = data[0]

		for _, value := range data {
			if value > max {
				max = value
			}
		}
	}
	return max
}

func maxInt64(data []int64) (max int64) {
	if len(data) > 0 {
		max = data[0]

		for _, value := range data {
			if value > max {
				max = value
			}
		}
	}
	return max
}

func maxUint32(data []uint32) (max uint32) {
	if len(data) > 0 {
		max = data[0]

		for _, value := range data {
			if value > max {
				max = value
			}
		}
	}
	return max
}

func maxUint64(data []uint64) (max uint64) {
	if len(data) > 0 {
		max = data[0]

		for _, value := range data {
			if value > max {
				max = value
			}
		}
	}
	return max
}

func maxFloat32(data []float32) (max float32) {
	if len(data) > 0 {
		max = data[0]

		for _, value := range data {
			if value > max {
				max = value
			}
		}
	}
	return max
}

func maxFloat64(data []float64) (max float64) {
	if len(data) > 0 {
		max = data[0]

		for _, value := range data {
			if value > max {
				max = value
			}
		}
	}
	return max
}

func maxBE128(data [][16]byte) (min []byte) {
	if len(data) > 0 {
		m := binary.BigEndian.Uint64(data[0][:8])
		j := 0
		for i := 1; i < len(data); i++ {
			x := binary.BigEndian.Uint64(data[i][:8])
			switch {
			case x > m:
				m, j = x, i
			case x == m:
				y := binary.BigEndian.Uint64(data[i][8:])
				n := binary.BigEndian.Uint64(data[j][8:])
				if y > n {
					m, j = x, i
				}
			}
		}
		min = data[j][:]
	}
	return min
}


================================================
FILE: page_max_test.go
================================================
package parquet

import (
	"bytes"
	"math/rand"
	"testing"

	"github.com/segmentio/parquet-go/internal/quick"
)

func TestMaxInt32(t *testing.T) {
	err := quick.Check(func(values []int32) bool {
		max := int32(0)
		if len(values) > 0 {
			max = values[0]
			for _, v := range values[1:] {
				if v > max {
					max = v
				}
			}
		}
		return max == maxInt32(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMaxInt64(t *testing.T) {
	err := quick.Check(func(values []int64) bool {
		max := int64(0)
		if len(values) > 0 {
			max = values[0]
			for _, v := range values[1:] {
				if v > max {
					max = v
				}
			}
		}
		return max == maxInt64(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMaxUint32(t *testing.T) {
	err := quick.Check(func(values []uint32) bool {
		max := uint32(0)
		if len(values) > 0 {
			max = values[0]
			for _, v := range values[1:] {
				if v > max {
					max = v
				}
			}
		}
		return max == maxUint32(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMaxUint64(t *testing.T) {
	err := quick.Check(func(values []uint64) bool {
		max := uint64(0)
		if len(values) > 0 {
			max = values[0]
			for _, v := range values[1:] {
				if v > max {
					max = v
				}
			}
		}
		return max == maxUint64(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMaxFloat32(t *testing.T) {
	err := quick.Check(func(values []float32) bool {
		max := float32(0)
		if len(values) > 0 {
			max = values[0]
			for _, v := range values[1:] {
				if v > max {
					max = v
				}
			}
		}
		return max == maxFloat32(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMaxFloat64(t *testing.T) {
	err := quick.Check(func(values []float64) bool {
		max := float64(0)
		if len(values) > 0 {
			max = values[0]
			for _, v := range values[1:] {
				if v > max {
					max = v
				}
			}
		}
		return max == maxFloat64(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMaxBE128(t *testing.T) {
	err := quick.Check(func(values [][16]byte) bool {
		max := [16]byte{}
		if len(values) > 0 {
			max = values[0]
			for _, v := range values[1:] {
				if bytes.Compare(v[:], max[:]) > 0 {
					max = v
				}
			}
		}
		ret := maxBE128(values)
		return (len(values) == 0 && ret == nil) || bytes.Equal(max[:], ret)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMaxFixedLenByteArray(t *testing.T) {
	err := quick.Check(func(values []byte) bool {
		max := [1]byte{}
		if len(values) > 0 {
			max[0] = values[0]
			for _, v := range values[1:] {
				if v > max[0] {
					max[0] = v
				}
			}
		}
		ret := maxFixedLenByteArray(values, 1)
		return (len(values) == 0 && ret == nil) || bytes.Equal(max[:], ret)
	})
	if err != nil {
		t.Error(err)
	}
}

func BenchmarkMaxInt32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Int31()
		}
		for i := 0; i < b.N; i++ {
			maxInt32(values)
		}
	})
}

func BenchmarkMaxInt64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Int63()
		}
		for i := 0; i < b.N; i++ {
			maxInt64(values)
		}
	})
}

func BenchmarkMaxUint32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Uint32()
		}
		for i := 0; i < b.N; i++ {
			maxUint32(values)
		}
	})
}

func BenchmarkMaxUint64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Uint64()
		}
		for i := 0; i < b.N; i++ {
			maxUint64(values)
		}
	})
}

func BenchmarkMaxFloat32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Float32()
		}
		for i := 0; i < b.N; i++ {
			maxFloat32(values)
		}
	})
}

func BenchmarkMaxFloat64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Float64()
		}
		for i := 0; i < b.N; i++ {
			maxFloat64(values)
		}
	})
}

func BenchmarkMaxBE128(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([][16]byte, bufferSize)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			prng.Read(values[i][:])
		}
		for i := 0; i < b.N; i++ {
			maxBE128(values)
		}
	})
}

func BenchmarkMaxFixedLenByteArray(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]byte, bufferSize)
		prng := rand.New(rand.NewSource(1))
		prng.Read(values)
		for i := 0; i < b.N; i++ {
			maxFixedLenByteArray(values, 32)
		}
	})
}


================================================
FILE: page_min.go
================================================
package parquet

import (
	"bytes"
)

func minFixedLenByteArray(data []byte, size int) (min []byte) {
	if len(data) > 0 {
		min = data[:size]

		for i, j := size, 2*size; j <= len(data); {
			item := data[i:j]

			if bytes.Compare(item, min) < 0 {
				min = item
			}

			i += size
			j += size
		}
	}
	return min
}


================================================
FILE: page_min_amd64.go
================================================
//go:build !purego

package parquet

//go:noescape
func minInt32(data []int32) int32

//go:noescape
func minInt64(data []int64) int64

//go:noescape
func minUint32(data []uint32) uint32

//go:noescape
func minUint64(data []uint64) uint64

//go:noescape
func minFloat32(data []float32) float32

//go:noescape
func minFloat64(data []float64) float64

//go:noescape
func minBE128(data [][16]byte) []byte


================================================
FILE: page_min_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func minInt32(data []int32) int32
TEXT ·minInt32(SB), NOSPLIT, $-28
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVLQZX (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTD (AX), Z0
loop32:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VPMINSD Z1, Z0, Z0
    VPMINSD Z2, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMINSD Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMINSD X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMINSD X1, X0, X0
    VZEROUPPER

    MOVQ X0, DX
    MOVL DX, BX
    SHRQ $32, DX
    CMPL DX, BX
    CMOVLLT DX, BX

    CMPQ SI, CX
    JE done
loop:
    MOVLQZX (AX)(SI*4), DX
    CMPL DX, BX
    CMOVLLT DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL BX, ret+24(FP)
    RET

// func minInt64(data []int64) int64
TEXT ·minInt64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVQ (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTQ (AX), Z0
loop32:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VMOVDQU64 128(AX)(SI*8), Z3
    VMOVDQU64 192(AX)(SI*8), Z4
    VPMINSQ Z1, Z2, Z5
    VPMINSQ Z3, Z4, Z6
    VPMINSQ Z5, Z6, Z1
    VPMINSQ Z1, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMINSQ Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMINSQ X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMINSQ X1, X0, X0
    VZEROUPPER

    MOVQ X0, BX
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    CMPQ DX, BX
    CMOVQLT DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ BX, ret+24(FP)
    RET

// func minUint32(data []int32) int32
TEXT ·minUint32(SB), NOSPLIT, $-28
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVLQZX (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTD (AX), Z0
loop32:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VPMINUD Z1, Z0, Z0
    VPMINUD Z2, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMINUD Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMINUD X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMINUD X1, X0, X0
    VZEROUPPER

    MOVQ X0, DX
    MOVL DX, BX
    SHRQ $32, DX
    CMPL DX, BX
    CMOVLCS DX, BX

    CMPQ SI, CX
    JE done
loop:
    MOVLQZX (AX)(SI*4), DX
    CMPL DX, BX
    CMOVLCS DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL BX, ret+24(FP)
    RET

// func minUint64(data []uint64) uint64
TEXT ·minUint64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORQ SI, SI
    MOVQ (AX), BX

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTQ (AX), Z0
loop32:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VMOVDQU64 128(AX)(SI*8), Z3
    VMOVDQU64 192(AX)(SI*8), Z4
    VPMINUQ Z1, Z2, Z5
    VPMINUQ Z3, Z4, Z6
    VPMINUQ Z5, Z6, Z1
    VPMINUQ Z1, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VPMINUQ Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VPMINUQ X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VPMINUQ X1, X0, X0
    VZEROUPPER

    MOVQ X0, BX
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    CMPQ DX, BX
    CMOVQCS DX, BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ BX, ret+24(FP)
    RET

// func minFloat32(data []float32) float32
TEXT ·minFloat32(SB), NOSPLIT, $-28
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORPS X0, X0
    XORPS X1, X1
    XORQ SI, SI
    MOVLQZX (AX), BX
    MOVQ BX, X0

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $64
    JB loop

    MOVQ CX, DI
    SHRQ $6, DI
    SHLQ $6, DI
    VPBROADCASTD (AX), Z0
loop64:
    VMOVDQU32 (AX)(SI*4), Z1
    VMOVDQU32 64(AX)(SI*4), Z2
    VMOVDQU32 128(AX)(SI*4), Z3
    VMOVDQU32 192(AX)(SI*4), Z4
    VMINPS Z1, Z2, Z5
    VMINPS Z3, Z4, Z6
    VMINPS Z5, Z6, Z1
    VMINPS Z1, Z0, Z0
    ADDQ $64, SI
    CMPQ SI, DI
    JNE loop64

    VMOVDQU32 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VMINPS Y1, Y0, Y0

    VMOVDQU32 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VMINPS X1, X0, X0

    VMOVDQU32 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VMINPS X1, X0, X0
    VZEROUPPER

    MOVAPS X0, X1
    PSRLQ $32, X1
    MOVQ X0, BX
    MOVQ X1, DX
    UCOMISS X0, X1
    CMOVLCS DX, BX

    CMPQ SI, CX
    JE done
    MOVQ BX, X0
loop:
    MOVLQZX (AX)(SI*4), DX
    MOVQ DX, X1
    UCOMISS X0, X1
    CMOVLCS DX, BX
    MOVQ BX, X0
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVL BX, ret+24(FP)
    RET

// func minFloat64(data []float64) float64
TEXT ·minFloat64(SB), NOSPLIT, $-32
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    XORQ BX, BX

    CMPQ CX, $0
    JE done
    XORPD X0, X0
    XORPD X1, X1
    XORQ SI, SI
    MOVQ (AX), BX
    MOVQ BX, X0

    CMPB ·hasAVX512VL(SB), $0
    JE loop

    CMPQ CX, $32
    JB loop

    MOVQ CX, DI
    SHRQ $5, DI
    SHLQ $5, DI
    VPBROADCASTQ (AX), Z0
loop32:
    VMOVDQU64 (AX)(SI*8), Z1
    VMOVDQU64 64(AX)(SI*8), Z2
    VMOVDQU64 128(AX)(SI*8), Z3
    VMOVDQU64 192(AX)(SI*8), Z4
    VMINPD Z1, Z2, Z5
    VMINPD Z3, Z4, Z6
    VMINPD Z5, Z6, Z1
    VMINPD Z1, Z0, Z0
    ADDQ $32, SI
    CMPQ SI, DI
    JNE loop32

    VMOVDQU64 swap32+0(SB), Z1
    VPERMI2D Z0, Z0, Z1
    VMINPD Y1, Y0, Y0

    VMOVDQU64 swap32+32(SB), Y1
    VPERMI2D Y0, Y0, Y1
    VMINPD X1, X0, X0

    VMOVDQU64 swap32+48(SB), X1
    VPERMI2D X0, X0, X1
    VMINPD X1, X0, X0
    VZEROUPPER

    MOVQ X0, BX
    CMPQ SI, CX
    JE done
loop:
    MOVQ (AX)(SI*8), DX
    MOVQ DX, X1
    UCOMISD X0, X1
    CMOVQCS DX, BX
    MOVQ BX, X0
    INCQ SI
    CMPQ SI, CX
    JNE loop
done:
    MOVQ BX, ret+24(FP)
    RET

// vpminu128 is a macro comparing unsigned 128 bits values held in the
// `srcValues` and `minValues` vectors. The `srcIndexes` and `minIndexes`
// vectors contain the indexes of elements in the value vectors. Remaining
// K and R arguments are mask and general purpose registers needed to hold
// temporary values during the computation. The last M argument is a mask
// generated by vpminu128mask.
//
// The routine uses AVX-512 instructions (VPCMPUQ, VPBLENDMQ) to implement
// the comparison of 128 bits values. The values are expected to be stored
// in the vectors as a little-endian pair of two consecutive quad words.
//
// The results are written to the `minValues` and `minIndexes` vectors,
// overwriting the inputs. `srcValues` and `srcIndexes` are read-only
// parameters.
//
// At a high level, for two pairs of quad words forming two 128 bits values
// A and B, the test implemented by this macro is:
//
//   A[1] < B[1] || (A[1] == B[1] && A[0] < B[0])
//
// Values in the source vector that evalute to true on this expression are
// written to the vector of minimum values, and their indexes are written to
// the vector of indexes.
#define vpminu128(srcValues, srcIndexes, minValues, minIndexes, K1, K2, R1, R2, R3, M) \
    VPCMPUQ $0, minValues, srcValues, K1 \
    VPCMPUQ $1, minValues, srcValues, K2 \
    KMOVB K1, R1 \
    KMOVB K2, R2 \
    MOVB R2, R3 \
    SHLB $1, R3 \
    ANDB R3, R1 \
    ORB R2, R1 \
    ANDB M, R1 \
    MOVB R1, R2 \
    SHRB $1, R2 \
    ORB R2, R1 \
    KMOVB R1, K1 \
    VPBLENDMQ srcValues, minValues, K1, minValues \
    VPBLENDMQ srcIndexes, minIndexes, K1, minIndexes

// vpminu128mask is a macro used to initialize the mask passed as last argument
// to vpminu128. The argument M is intended to be a general purpose register.
//
// The bit mask is used to merge the results of the "less than" and "equal"
// comparison that are performed on each lane of minimum vectors. The upper bits
// are used to compute results of the operation to determines which of the pairs
// of quad words representing the 128 bits elements are the minimums.
#define vpminu128mask(M) MOVB $0b10101010, M

// func minBE128(data [][16]byte) []byte
TEXT ·minBE128(SB), NOSPLIT, $-48
    MOVQ data_base+0(FP), AX
    MOVQ data_len+8(FP), CX
    CMPQ CX, $0
    JE null

    SHLQ $4, CX
    MOVQ CX, DX // len
    MOVQ AX, BX // min
    ADDQ AX, CX // end

    CMPQ DX, $256
    JB loop

    CMPB ·hasAVX512MinMaxBE128(SB), $0
    JE loop

    // Z19 holds a vector of the count by which we increment the vectors of
    // swap at each loop iteration.
    MOVQ $16, DI
    VPBROADCASTQ DI, Z19

    // Z31 holds the shuffle mask used to convert 128 bits elements from big to
    // little endian so we can apply vectorized comparison instructions.
    VMOVDQU64 bswap128(SB), Z31

    // These vectors hold four lanes of minimum values found in the input.
    VBROADCASTI64X2 (AX), Z0
    VPSHUFB Z31, Z0, Z0
    VMOVDQU64 Z0, Z5
    VMOVDQU64 Z0, Z10
    VMOVDQU64 Z0, Z15

    // These vectors hold four lanes of swap of minimum values.
    //
    // We initialize them at zero because we broadcast the first value of the
    // input in the vectors that track the minimums of each lane; in other
    // words, we assume the minimum value is at the first offset and work our
    // way up from there.
    VPXORQ Z2, Z2, Z2
    VPXORQ Z7, Z7, Z7
    VPXORQ Z12, Z12, Z12
    VPXORQ Z17, Z17, Z17

    // These vectors are used to compute the swap of minimum values held
    // in [Z1, Z5, Z10, Z15]. Each vector holds a contiguous sequence of
    // swap; for example, Z3 is initialized with [0, 1, 2, 3]. At each
    // loop iteration, the swap are incremented by the number of elements
    // consumed from the input (4x4=16).
    VMOVDQU64 indexes128(SB), Z3
    VPXORQ Z8, Z8, Z8
    VPXORQ Z13, Z13, Z13
    VPXORQ Z18, Z18, Z18
    MOVQ $4, DI
    VPBROADCASTQ DI, Z1
    VPADDQ Z1, Z3, Z8
    VPADDQ Z1, Z8, Z13
    VPADDQ Z1, Z13, Z18

    vpminu128mask(DI)
    SHRQ $8, DX
    SHLQ $8, DX
    ADDQ AX, DX
loop16:
    // Compute 4x4 minimum values in vector registers, along with their swap
    // in the input array.
    VMOVDQU64 (AX), Z1
    VMOVDQU64 64(AX), Z6
    VMOVDQU64 128(AX), Z11
    VMOVDQU64 192(AX), Z16
    VPSHUFB Z31, Z1, Z1
    VPSHUFB Z31, Z6, Z6
    VPSHUFB Z31, Z11, Z11
    VPSHUFB Z31, Z16, Z16
    vpminu128(Z1, Z3, Z0, Z2, K1, K2, R8, R9, R10, DI)
    vpminu128(Z6, Z8, Z5, Z7, K3, K4, R11, R12, R13, DI)
    vpminu128(Z11, Z13, Z10, Z12, K1, K2, R8, R9, R10, DI)
    vpminu128(Z16, Z18, Z15, Z17, K3, K4, R11, R12, R13, DI)
    VPADDQ Z19, Z3, Z3
    VPADDQ Z19, Z8, Z8
    VPADDQ Z19, Z13, Z13
    VPADDQ Z19, Z18, Z18
    ADDQ $256, AX
    CMPQ AX, DX
    JB loop16

    // After the loop completed, we need to merge the lanes that each contain
    // 4 minimum values (so 16 total candidate at this stage). The results are
    // reduced into 4 candidates in Z0, with their swap in Z2.
    vpminu128(Z10, Z12, Z0, Z2, K1, K2, R8, R9, R10, DI)
    vpminu128(Z15, Z17, Z5, Z7, K3, K4, R11, R12, R13, DI)
    vpminu128(Z5, Z7, Z0, Z2, K1, K2, R8, R9, R10, DI)

    // Further reduce the results by swapping the upper and lower parts of the
    // vector registers, and comparing them to determine which values are the
    // smallest. We compare 2x2 values at this step, then 2x1 values at the next
    // to find the index of the minimum.
    VMOVDQU64 swap64+0(SB), Z1
    VMOVDQU64 swap64+0(SB), Z3
    VPERMI2Q Z0, Z0, Z1
    VPERMI2Q Z2, Z2, Z3
    vpminu128(Y1, Y3, Y0, Y2, K1, K2, R8, R9, R10, DI)

    VMOVDQU64 swap64+32(SB), Y1
    VMOVDQU64 swap64+32(SB), Y3
    VPERMI2Q Y0, Y0, Y1
    VPERMI2Q Y2, Y2, Y3
    vpminu128(X1, X3, X0, X2, K1, K2, R8, R9, R10, DI)
    VZEROUPPER

    // Extract the index of the minimum value computed in the lower 64 bits of
    // X2 and position the BX pointer at the index of the minimum value.
    MOVQ X2, DX
    SHLQ $4, DX
    ADDQ DX, BX
    CMPQ AX, CX
    JE done

    // Unless the input was aligned on 256 bytes, we need to perform a few more
    // iterations on the remaining elements.
    //
    // This loop is also taken if the CPU has no support for AVX-512.
loop:
    MOVQ (AX), R8
    MOVQ (BX), R9
    BSWAPQ R8
    BSWAPQ R9
    CMPQ R8, R9
    JB less
    JA next
    MOVQ 8(AX), R8
    MOVQ 8(BX), R9
    BSWAPQ R8
    BSWAPQ R9
    CMPQ R8, R9
    JAE next
less:
    MOVQ AX, BX
next:
    ADDQ $16, AX
    CMPQ AX, CX
    JB loop
done:
    MOVQ BX, ret_base+24(FP)
    MOVQ $16, ret_len+32(FP)
    MOVQ $16, ret_cap+40(FP)
    RET
null:
    XORQ BX, BX
    MOVQ BX, ret_base+24(FP)
    MOVQ BX, ret_len+32(FP)
    MOVQ BX, ret_cap+40(FP)
    RET


================================================
FILE: page_min_purego.go
================================================
//go:build purego || !amd64

package parquet

import "encoding/binary"

// -----------------------------------------------------------------------------
// TODO: use generics versions of the these functions to reduce the amount of
// code to maintain when we drop compatilibty with Go version older than 1.18.
// -----------------------------------------------------------------------------

func minInt32(data []int32) (min int32) {
	if len(data) > 0 {
		min = data[0]

		for _, value := range data {
			if value < min {
				min = value
			}
		}
	}
	return min
}

func minInt64(data []int64) (min int64) {
	if len(data) > 0 {
		min = data[0]

		for _, value := range data {
			if value < min {
				min = value
			}
		}
	}
	return min
}

func minUint32(data []uint32) (min uint32) {
	if len(data) > 0 {
		min = data[0]

		for _, value := range data {
			if value < min {
				min = value
			}
		}
	}
	return min
}

func minUint64(data []uint64) (min uint64) {
	if len(data) > 0 {
		min = data[0]

		for _, value := range data {
			if value < min {
				min = value
			}
		}
	}
	return min
}

func minFloat32(data []float32) (min float32) {
	if len(data) > 0 {
		min = data[0]

		for _, value := range data {
			if value < min {
				min = value
			}
		}
	}
	return min
}

func minFloat64(data []float64) (min float64) {
	if len(data) > 0 {
		min = data[0]

		for _, value := range data {
			if value < min {
				min = value
			}
		}
	}
	return min
}

func minBE128(data [][16]byte) (min []byte) {
	if len(data) > 0 {
		m := binary.BigEndian.Uint64(data[0][:8])
		j := 0
		for i := 1; i < len(data); i++ {
			x := binary.BigEndian.Uint64(data[i][:8])
			switch {
			case x < m:
				m, j = x, i
			case x == m:
				y := binary.BigEndian.Uint64(data[i][8:])
				n := binary.BigEndian.Uint64(data[j][8:])
				if y < n {
					m, j = x, i
				}
			}
		}
		min = data[j][:]
	}
	return min
}


================================================
FILE: page_min_test.go
================================================
package parquet

import (
	"bytes"
	"math/rand"
	"testing"

	"github.com/segmentio/parquet-go/internal/quick"
)

func TestMinInt32(t *testing.T) {
	err := quick.Check(func(values []int32) bool {
		min := int32(0)
		if len(values) > 0 {
			min = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
			}
		}
		return min == minInt32(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMinInt64(t *testing.T) {
	err := quick.Check(func(values []int64) bool {
		min := int64(0)
		if len(values) > 0 {
			min = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
			}
		}
		return min == minInt64(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMinUint32(t *testing.T) {
	err := quick.Check(func(values []uint32) bool {
		min := uint32(0)
		if len(values) > 0 {
			min = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
			}
		}
		return min == minUint32(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMinUint64(t *testing.T) {
	err := quick.Check(func(values []uint64) bool {
		min := uint64(0)
		if len(values) > 0 {
			min = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
			}
		}
		return min == minUint64(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMinFloat32(t *testing.T) {
	err := quick.Check(func(values []float32) bool {
		min := float32(0)
		if len(values) > 0 {
			min = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
			}
		}
		return min == minFloat32(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMinFloat64(t *testing.T) {
	err := quick.Check(func(values []float64) bool {
		min := float64(0)
		if len(values) > 0 {
			min = values[0]
			for _, v := range values[1:] {
				if v < min {
					min = v
				}
			}
		}
		return min == minFloat64(values)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMinBE128(t *testing.T) {
	err := quick.Check(func(values [][16]byte) bool {
		min := [16]byte{}
		if len(values) > 0 {
			min = values[0]
			for _, v := range values[1:] {
				if bytes.Compare(v[:], min[:]) < 0 {
					min = v
				}
			}
		}
		ret := minBE128(values)
		return (len(values) == 0 && ret == nil) || bytes.Equal(min[:], ret)
	})
	if err != nil {
		t.Error(err)
	}
}

func TestMinFixedLenByteArray(t *testing.T) {
	err := quick.Check(func(values []byte) bool {
		min := [1]byte{}
		if len(values) > 0 {
			min[0] = values[0]
			for _, v := range values[1:] {
				if v < min[0] {
					min[0] = v
				}
			}
		}
		ret := minFixedLenByteArray(values, 1)
		return (len(values) == 0 && ret == nil) || bytes.Equal(min[:], ret)
	})
	if err != nil {
		t.Error(err)
	}
}

func BenchmarkMinInt32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Int31()
		}
		for i := 0; i < b.N; i++ {
			minInt32(values)
		}
	})
}

func BenchmarkMinInt64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]int64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Int63()
		}
		for i := 0; i < b.N; i++ {
			minInt64(values)
		}
	})
}

func BenchmarkMinUint32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Uint32()
		}
		for i := 0; i < b.N; i++ {
			minUint32(values)
		}
	})
}

func BenchmarkMinUint64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]uint64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Uint64()
		}
		for i := 0; i < b.N; i++ {
			minUint64(values)
		}
	})
}

func BenchmarkMinFloat32(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float32, bufferSize/4)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Float32()
		}
		for i := 0; i < b.N; i++ {
			minFloat32(values)
		}
	})
}

func BenchmarkMinFloat64(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]float64, bufferSize/8)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			values[i] = prng.Float64()
		}
		for i := 0; i < b.N; i++ {
			minFloat64(values)
		}
	})
}

func BenchmarkMinBE128(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([][16]byte, bufferSize)
		prng := rand.New(rand.NewSource(1))
		for i := range values {
			prng.Read(values[i][:])
		}
		for i := 0; i < b.N; i++ {
			minBE128(values)
		}
	})
}

func BenchmarkMinFixedLenByteArray(b *testing.B) {
	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
		values := make([]byte, bufferSize)
		prng := rand.New(rand.NewSource(1))
		prng.Read(values)
		for i := 0; i < b.N; i++ {
			minFixedLenByteArray(values, 32)
		}
	})
}


================================================
FILE: page_test.go
================================================
package parquet_test

import (
	"bytes"
	"io"
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

func TestPage(t *testing.T) {
	t.Run("BOOLEAN", testPageBoolean)
	t.Run("INT32", testPageInt32)
	t.Run("INT64", testPageInt64)
	t.Run("INT96", testPageInt96)
	t.Run("FLOAT", testPageFloat)
	t.Run("DOUBLE", testPageDouble)
	t.Run("BYTE_ARRAY", testPageByteArray)
	t.Run("FIXED_LEN_BYTE_ARRAY", testPageFixedLenByteArray)
}

func testPageBoolean(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value bool }{})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := make([]bool, 50_000)
				for i := range values {
					values[i] = i%2 == 0
				}
				n, err := w.(parquet.BooleanWriter).WriteBooleans(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]bool, 50_000)
				n, err := r.(parquet.BooleanReader).ReadBooleans(values)
				return values[:n], err
			},
		})
	})
}

func testPageInt32(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value int32 }{})

	t.Run("io", func(t *testing.T) {
		testBufferPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []int32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(io.Writer).Write(unsafecast.Int32ToBytes(values))
				return values[:n/4], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]int32, 10)
				n, err := r.(io.Reader).Read(unsafecast.Int32ToBytes(values))
				return values[:n/4], err
			},
		})
	})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []int32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(parquet.Int32Writer).WriteInt32s(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]int32, 10)
				n, err := r.(parquet.Int32Reader).ReadInt32s(values)
				return values[:n], err
			},
		})
	})
}

func testPageInt64(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value int64 }{})

	t.Run("io", func(t *testing.T) {
		testBufferPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []int64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(io.Writer).Write(unsafecast.Int64ToBytes(values))
				return values[:n/8], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]int64, 10)
				n, err := r.(io.Reader).Read(unsafecast.Int64ToBytes(values))
				return values[:n/8], err
			},
		})
	})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []int64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(parquet.Int64Writer).WriteInt64s(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]int64, 10)
				n, err := r.(parquet.Int64Reader).ReadInt64s(values)
				return values[:n], err
			},
		})
	})
}

func testPageInt96(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value deprecated.Int96 }{})

	t.Run("io", func(t *testing.T) {
		testBufferPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []deprecated.Int96{{0: 0}, {0: 1}, {0: 2}}
				n, err := w.(io.Writer).Write(deprecated.Int96ToBytes(values))
				return values[:n/12], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]deprecated.Int96, 3)
				n, err := r.(io.Reader).Read(deprecated.Int96ToBytes(values))
				return values[:n/12], err
			},
		})
	})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []deprecated.Int96{{0: 0}, {0: 1}, {0: 2}}
				n, err := w.(parquet.Int96Writer).WriteInt96s(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]deprecated.Int96, 3)
				n, err := r.(parquet.Int96Reader).ReadInt96s(values)
				return values[:n], err
			},
		})
	})
}

func testPageFloat(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value float32 }{})

	t.Run("io", func(t *testing.T) {
		testBufferPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(io.Writer).Write(unsafecast.Float32ToBytes(values))
				return values[:n/4], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]float32, 10)
				n, err := r.(io.Reader).Read(unsafecast.Float32ToBytes(values))
				return values[:n/4], err
			},
		})
	})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(parquet.FloatWriter).WriteFloats(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]float32, 10)
				n, err := r.(parquet.FloatReader).ReadFloats(values)
				return values[:n], err
			},
		})
	})
}

func testPageDouble(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value float64 }{})

	t.Run("io", func(t *testing.T) {
		testBufferPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(io.Writer).Write(unsafecast.Float64ToBytes(values))
				return values[:n/8], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]float64, 10)
				n, err := r.(io.Reader).Read(unsafecast.Float64ToBytes(values))
				return values[:n/8], err
			},
		})
	})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
				n, err := w.(parquet.DoubleWriter).WriteDoubles(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]float64, 10)
				n, err := r.(parquet.DoubleReader).ReadDoubles(values)
				return values[:n], err
			},
		})
	})
}

func testPageByteArray(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value []byte }{})

	t.Run("io", func(t *testing.T) {
		testBufferPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []byte{}
				values = plain.AppendByteArray(values, []byte("A"))
				values = plain.AppendByteArray(values, []byte("B"))
				values = plain.AppendByteArray(values, []byte("C"))
				n, err := w.(io.Writer).Write(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]byte, 3+3*plain.ByteArrayLengthSize)
				n, err := r.(io.Reader).Read(values)
				return values[:n], err
			},
		})
	})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []byte{}
				values = plain.AppendByteArray(values, []byte("A"))
				values = plain.AppendByteArray(values, []byte("B"))
				values = plain.AppendByteArray(values, []byte("C"))
				_, err := w.(parquet.ByteArrayWriter).WriteByteArrays(values)
				return values, err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]byte, 3+3*plain.ByteArrayLengthSize)
				n, err := r.(parquet.ByteArrayReader).ReadByteArrays(values)
				return values[:n+n*plain.ByteArrayLengthSize], err
			},
		})
	})
}

func testPageFixedLenByteArray(t *testing.T) {
	schema := parquet.SchemaOf(struct{ Value [3]byte }{})

	t.Run("io", func(t *testing.T) {
		testBufferPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []byte("123456789")
				n, err := w.(io.Writer).Write(values)
				return values[:n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]byte, 3*3)
				n, err := r.(io.Reader).Read(values)
				return values[:n], err
			},
		})
	})

	t.Run("parquet", func(t *testing.T) {
		testPage(t, schema, pageTest{
			write: func(w parquet.ValueWriter) (interface{}, error) {
				values := []byte("123456789")
				n, err := w.(parquet.FixedLenByteArrayWriter).WriteFixedLenByteArrays(values)
				return values[:3*n], err
			},

			read: func(r parquet.ValueReader) (interface{}, error) {
				values := make([]byte, 3*3)
				n, err := r.(parquet.FixedLenByteArrayReader).ReadFixedLenByteArrays(values)
				return values[:3*n], err
			},
		})
	})
}

type pageTest struct {
	write func(parquet.ValueWriter) (interface{}, error)
	read  func(parquet.ValueReader) (interface{}, error)
}

func testPage(t *testing.T, schema *parquet.Schema, test pageTest) {
	t.Run("buffer", func(t *testing.T) { testBufferPage(t, schema, test) })
	t.Run("file", func(t *testing.T) { testFilePage(t, schema, test) })
}

func testBufferPage(t *testing.T, schema *parquet.Schema, test pageTest) {
	buffer := parquet.NewBuffer(schema)
	column := buffer.ColumnBuffers()[0]

	w, err := test.write(column)
	if err != nil {
		t.Fatal("writing page values:", err)
	}

	r, err := test.read(column.Page().Values())
	if err != io.EOF {
		t.Errorf("expected io.EOF after reading all values but got %v", err)
	}
	if !reflect.DeepEqual(w, r) {
		t.Errorf("wrong values read from the page: got=%+v want=%+v", r, w)
	}
}

func testFilePage(t *testing.T, schema *parquet.Schema, test pageTest) {
	buffer := parquet.NewBuffer(schema)
	column := buffer.ColumnBuffers()[0]

	w, err := test.write(column)
	if err != nil {
		t.Fatal("writing page values:", err)
	}

	output := new(bytes.Buffer)
	writer := parquet.NewWriter(output)
	n, err := writer.WriteRowGroup(buffer)
	if err != nil {
		t.Fatal("writing parquet file:", err)
	}
	if err := writer.Close(); err != nil {
		t.Fatal("writing parquet file:", err)
	}
	if n != buffer.NumRows() {
		t.Fatalf("number of rows written mismatch: got=%d want=%d", n, buffer.NumRows())
	}

	reader := bytes.NewReader(output.Bytes())
	f, err := parquet.OpenFile(reader, reader.Size())
	if err != nil {
		t.Fatal("opening parquet file:", err)
	}

	pages := f.RowGroups()[0].ColumnChunks()[0].Pages()
	defer pages.Close()

	p, err := pages.ReadPage()
	if err != nil {
		t.Fatal("reading parquet page:", err)
	}
	defer parquet.Release(p)

	values := p.Values()
	r, err := test.read(values)
	if err != io.EOF && err != nil {
		t.Errorf("expected io.EOF after reading all values but got %v", err)
	}
	if !reflect.DeepEqual(w, r) {
		t.Errorf("wrong values read from the page: got=%+v want=%+v", r, w)
	}
	if r, err := test.read(values); reflect.ValueOf(r).Len() != 0 || err != io.EOF {
		t.Errorf("expected no data and io.EOF after reading all values but got %d and %v", r, err)
	}
}

type testStruct struct {
	Value *string
}

func TestOptionalPageTrailingNulls(t *testing.T) {
	schema := parquet.SchemaOf(&testStruct{})
	buffer := parquet.NewBuffer(schema)

	str := "test"
	rows := []testStruct{{
		Value: nil,
	}, {
		Value: &str,
	}, {
		Value: nil,
	}}

	for _, row := range rows {
		_, err := buffer.WriteRows([]parquet.Row{schema.Deconstruct(nil, row)})
		if err != nil {
			t.Fatal("writing row:", err)
		}
	}

	resultRows := make([]parquet.Row, 0, len(rows))
	bufferRows := make([]parquet.Row, 10)
	reader := buffer.Rows()
	defer reader.Close()
	for {
		n, err := reader.ReadRows(bufferRows)
		resultRows = append(resultRows, bufferRows[:n]...)
		if err != nil {
			if err == io.EOF {
				break
			}
			t.Fatal("reading rows:", err)
		}
	}

	if len(resultRows) != len(rows) {
		t.Errorf("wrong number of rows read: got=%d want=%d", len(resultRows), len(rows))
	}
}

func TestOptionalPagePreserveIndex(t *testing.T) {
	schema := parquet.SchemaOf(&testStruct{})
	buffer := parquet.NewBuffer(schema)

	_, err := buffer.WriteRows([]parquet.Row{
		schema.Deconstruct(nil, &testStruct{Value: nil}),
	})
	if err != nil {
		t.Fatal("writing row:", err)
	}

	rows := buffer.Rows()
	defer rows.Close()

	rowbuf := make([]parquet.Row, 2)

	n, err := rows.ReadRows(rowbuf)
	if err != nil && err != io.EOF {
		t.Fatal("reading rows:", err)
	}
	if n != 1 {
		t.Fatal("wrong number of rows returned:", n)
	}
	if rowbuf[0][0].Column() != 0 {
		t.Errorf("wrong index: got=%d want=%d", rowbuf[0][0].Column(), 0)
	}

	n, err = rows.ReadRows(rowbuf)
	if err != io.EOF {
		t.Fatal("reading EOF:", err)
	}
	if n != 0 {
		t.Fatal("expected no more rows after EOF:", n)
	}
}

func TestRepeatedPageTrailingNulls(t *testing.T) {
	type testStruct struct {
		A []string `parquet:"a"`
	}

	s := parquet.SchemaOf(&testStruct{})

	records := []*testStruct{
		{A: nil},
		{A: []string{"test"}},
		{A: nil},
	}

	buf := parquet.NewBuffer(s)
	for _, rec := range records {
		row := s.Deconstruct(nil, rec)
		_, err := buf.WriteRows([]parquet.Row{row})
		if err != nil {
			t.Fatal(err)
		}
	}

	rows := make([]parquet.Row, len(records)+1)
	reader := buf.Rows()
	defer reader.Close()

	n, err := reader.ReadRows(rows)
	if err != nil && err != io.EOF {
		t.Fatal("reading rows:", err)
	}

	if n != len(records) {
		t.Errorf("wrong number of rows read: got=%d want=%d", n, len(records))
	}
}


================================================
FILE: page_values.go
================================================
package parquet

import (
	"io"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

type optionalPageValues struct {
	page   *optionalPage
	values ValueReader
	offset int
}

func (r *optionalPageValues) ReadValues(values []Value) (n int, err error) {
	maxDefinitionLevel := r.page.maxDefinitionLevel
	definitionLevels := r.page.definitionLevels
	columnIndex := ^int16(r.page.Column())

	for n < len(values) && r.offset < len(definitionLevels) {
		for n < len(values) && r.offset < len(definitionLevels) && definitionLevels[r.offset] != maxDefinitionLevel {
			values[n] = Value{
				definitionLevel: definitionLevels[r.offset],
				columnIndex:     columnIndex,
			}
			r.offset++
			n++
		}

		i := n
		j := r.offset
		for i < len(values) && j < len(definitionLevels) && definitionLevels[j] == maxDefinitionLevel {
			i++
			j++
		}

		if n < i {
			for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- {
				values[n].definitionLevel = maxDefinitionLevel
				r.offset++
				n++
			}
			// Do not return on an io.EOF here as we may still have null values to read.
			if err != nil && err != io.EOF {
				return n, err
			}
			err = nil
		}
	}

	if r.offset == len(definitionLevels) {
		err = io.EOF
	}
	return n, err
}

type repeatedPageValues struct {
	page   *repeatedPage
	values ValueReader
	offset int
}

func (r *repeatedPageValues) ReadValues(values []Value) (n int, err error) {
	maxDefinitionLevel := r.page.maxDefinitionLevel
	definitionLevels := r.page.definitionLevels
	repetitionLevels := r.page.repetitionLevels
	columnIndex := ^int16(r.page.Column())

	// While we haven't exceeded the output buffer and we haven't exceeded the page size.
	for n < len(values) && r.offset < len(definitionLevels) {

		// While we haven't exceeded the output buffer and we haven't exceeded the
		// page size AND the current element's definitionLevel is not the
		// maxDefinitionLevel (this is a null value), Create the zero values to be
		// returned in this run.
		for n < len(values) && r.offset < len(definitionLevels) && definitionLevels[r.offset] != maxDefinitionLevel {
			values[n] = Value{
				repetitionLevel: repetitionLevels[r.offset],
				definitionLevel: definitionLevels[r.offset],
				columnIndex:     columnIndex,
			}
			r.offset++
			n++
		}

		i := n
		j := r.offset
		// Get the length of the run of non-zero values to be copied.
		for i < len(values) && j < len(definitionLevels) && definitionLevels[j] == maxDefinitionLevel {
			i++
			j++
		}

		// Copy all the non-zero values in this run.
		if n < i {
			for j, err = r.values.ReadValues(values[n:i]); j > 0; j-- {
				values[n].repetitionLevel = repetitionLevels[r.offset]
				values[n].definitionLevel = maxDefinitionLevel
				r.offset++
				n++
			}
			if err != nil && err != io.EOF {
				return n, err
			}
			err = nil
		}
	}

	if r.offset == len(definitionLevels) {
		err = io.EOF
	}
	return n, err
}

type booleanPageValues struct {
	page   *booleanPage
	offset int
}

func (r *booleanPageValues) ReadBooleans(values []bool) (n int, err error) {
	for n < len(values) && r.offset < int(r.page.numValues) {
		values[n] = r.page.valueAt(r.offset)
		r.offset++
		n++
	}
	if r.offset == int(r.page.numValues) {
		err = io.EOF
	}
	return n, err
}

func (r *booleanPageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < int(r.page.numValues) {
		values[n] = r.page.makeValue(r.page.valueAt(r.offset))
		r.offset++
		n++
	}
	if r.offset == int(r.page.numValues) {
		err = io.EOF
	}
	return n, err
}

type int32PageValues struct {
	page   *int32Page
	offset int
}

func (r *int32PageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadInt32s(unsafecast.BytesToInt32(b))
	return 4 * n, err
}

func (r *int32PageValues) ReadInt32s(values []int32) (n int, err error) {
	n = copy(values, r.page.values[r.offset:])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

func (r *int32PageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type int64PageValues struct {
	page   *int64Page
	offset int
}

func (r *int64PageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadInt64s(unsafecast.BytesToInt64(b))
	return 8 * n, err
}

func (r *int64PageValues) ReadInt64s(values []int64) (n int, err error) {
	n = copy(values, r.page.values[r.offset:])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

func (r *int64PageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type int96PageValues struct {
	page   *int96Page
	offset int
}

func (r *int96PageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadInt96s(deprecated.BytesToInt96(b))
	return 12 * n, err
}

func (r *int96PageValues) ReadInt96s(values []deprecated.Int96) (n int, err error) {
	n = copy(values, r.page.values[r.offset:])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

func (r *int96PageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type floatPageValues struct {
	page   *floatPage
	offset int
}

func (r *floatPageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadFloats(unsafecast.BytesToFloat32(b))
	return 4 * n, err
}

func (r *floatPageValues) ReadFloats(values []float32) (n int, err error) {
	n = copy(values, r.page.values[r.offset:])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

func (r *floatPageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type doublePageValues struct {
	page   *doublePage
	offset int
}

func (r *doublePageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadDoubles(unsafecast.BytesToFloat64(b))
	return 8 * n, err
}

func (r *doublePageValues) ReadDoubles(values []float64) (n int, err error) {
	n = copy(values, r.page.values[r.offset:])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

func (r *doublePageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type byteArrayPageValues struct {
	page   *byteArrayPage
	offset int
}

func (r *byteArrayPageValues) Read(b []byte) (int, error) {
	_, n, err := r.readByteArrays(b)
	return n, err
}

func (r *byteArrayPageValues) ReadRequired(values []byte) (int, error) {
	return r.ReadByteArrays(values)
}

func (r *byteArrayPageValues) ReadByteArrays(values []byte) (int, error) {
	n, _, err := r.readByteArrays(values)
	return n, err
}

func (r *byteArrayPageValues) readByteArrays(values []byte) (c, n int, err error) {
	numValues := r.page.len()
	for r.offset < numValues {
		b := r.page.index(r.offset)
		k := plain.ByteArrayLengthSize + len(b)
		if k > (len(values) - n) {
			break
		}
		plain.PutByteArrayLength(values[n:], len(b))
		n += plain.ByteArrayLengthSize
		n += copy(values[n:], b)
		r.offset++
		c++
	}
	if r.offset == numValues {
		err = io.EOF
	} else if n == 0 && len(values) > 0 {
		err = io.ErrShortBuffer
	}
	return c, n, err
}

func (r *byteArrayPageValues) ReadValues(values []Value) (n int, err error) {
	numValues := r.page.len()
	for n < len(values) && r.offset < numValues {
		values[n] = r.page.makeValueBytes(r.page.index(r.offset))
		r.offset++
		n++
	}
	if r.offset == numValues {
		err = io.EOF
	}
	return n, err
}

type fixedLenByteArrayPageValues struct {
	page   *fixedLenByteArrayPage
	offset int
}

func (r *fixedLenByteArrayPageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadFixedLenByteArrays(b)
	return n * r.page.size, err
}

func (r *fixedLenByteArrayPageValues) ReadRequired(values []byte) (int, error) {
	return r.ReadFixedLenByteArrays(values)
}

func (r *fixedLenByteArrayPageValues) ReadFixedLenByteArrays(values []byte) (n int, err error) {
	n = copy(values, r.page.data[r.offset:]) / r.page.size
	r.offset += n * r.page.size
	if r.offset == len(r.page.data) {
		err = io.EOF
	} else if n == 0 && len(values) > 0 {
		err = io.ErrShortBuffer
	}
	return n, err
}

func (r *fixedLenByteArrayPageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.data) {
		values[n] = r.page.makeValueBytes(r.page.data[r.offset : r.offset+r.page.size])
		r.offset += r.page.size
		n++
	}
	if r.offset == len(r.page.data) {
		err = io.EOF
	}
	return n, err
}

type uint32PageValues struct {
	page   *uint32Page
	offset int
}

func (r *uint32PageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadUint32s(unsafecast.BytesToUint32(b))
	return 4 * n, err
}

func (r *uint32PageValues) ReadUint32s(values []uint32) (n int, err error) {
	n = copy(values, r.page.values[r.offset:])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

func (r *uint32PageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type uint64PageValues struct {
	page   *uint64Page
	offset int
}

func (r *uint64PageValues) Read(b []byte) (n int, err error) {
	n, err = r.ReadUint64s(unsafecast.BytesToUint64(b))
	return 8 * n, err
}

func (r *uint64PageValues) ReadUint64s(values []uint64) (n int, err error) {
	n = copy(values, r.page.values[r.offset:])
	r.offset += n
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

func (r *uint64PageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type be128PageValues struct {
	page   *be128Page
	offset int
}

func (r *be128PageValues) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.offset < len(r.page.values) {
		values[n] = r.page.makeValue(&r.page.values[r.offset])
		r.offset++
		n++
	}
	if r.offset == len(r.page.values) {
		err = io.EOF
	}
	return n, err
}

type nullPageValues struct {
	column int
	remain int
}

func (r *nullPageValues) ReadValues(values []Value) (n int, err error) {
	columnIndex := ^int16(r.column)
	values = values[:min(r.remain, len(values))]
	for i := range values {
		values[i] = Value{columnIndex: columnIndex}
	}
	r.remain -= len(values)
	if r.remain == 0 {
		err = io.EOF
	}
	return len(values), err
}


================================================
FILE: parquet.go
================================================
// Copyright 2022 Twilio Inc.

// Package parquet is a library for working with parquet files. For an overview
// of Parquet's qualities as a storage format, see this blog post:
// https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet
//
// Or see the Parquet documentation: https://parquet.apache.org/docs/
package parquet

import "reflect"

func atLeastOne(size int) int {
	return atLeast(size, 1)
}

func atLeast(size, least int) int {
	if size < least {
		return least
	}
	return size
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}

func typeNameOf(t reflect.Type) string {
	s1 := t.String()
	s2 := t.Kind().String()
	if s1 == s2 {
		return s1
	}
	return s1 + " (" + s2 + ")"
}

func isZero(b []byte) bool {
	for _, c := range b {
		if c != 0 {
			return false
		}
	}
	return true
}


================================================
FILE: parquet_amd64.go
================================================
//go:build !purego

package parquet

import "golang.org/x/sys/cpu"

var (
	// This variable is used in x86 assembly source files to gate the use of
	// AVX2 instructions depending on whether the CPU supports it.
	hasAVX2     = cpu.X86.HasAVX2
	hasAVX512F  = cpu.X86.HasAVX512F
	hasAVX512VL = cpu.X86.HasAVX512F && cpu.X86.HasAVX512VL
	// For min/max functions over big-endian 128 bits values, we need the
	// follwing instructions from the DQ set:
	// * VPBROADCASTQ (with 64 bits source register)
	// * VBROADCASTI64X2
	hasAVX512MinMaxBE128 = cpu.X86.HasAVX512F && cpu.X86.HasAVX512DQ
)


================================================
FILE: parquet_go18.go
================================================
//go:build go1.18

package parquet

import (
	"io"
	"os"
)

// Read reads and returns rows from the parquet file in the given reader.
//
// The type T defines the type of rows read from r. T must be compatible with
// the file's schema or an error will be returned. The row type might represent
// a subset of the full schema, in which case only a subset of the columns will
// be loaded from r.
//
// This function is provided for convenience to facilitate reading of parquet
// files from arbitrary locations in cases where the data set fit in memory.
func Read[T any](r io.ReaderAt, size int64, options ...ReaderOption) (rows []T, err error) {
	config, err := NewReaderConfig(options...)
	if err != nil {
		return nil, err
	}
	file, err := OpenFile(r, size)
	if err != nil {
		return nil, err
	}
	rows = make([]T, file.NumRows())
	reader := NewGenericReader[T](file, config)
	n, err := reader.Read(rows)
	if err == io.EOF {
		err = nil
	}
	reader.Close()
	return rows[:n], err
}

// ReadFile reads rows of the parquet file at the given path.
//
// The type T defines the type of rows read from r. T must be compatible with
// the file's schema or an error will be returned. The row type might represent
// a subset of the full schema, in which case only a subset of the columns will
// be loaded from the file.
//
// This function is provided for convenience to facilitate reading of parquet
// files from the file system in cases where the data set fit in memory.
func ReadFile[T any](path string, options ...ReaderOption) (rows []T, err error) {
	f, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	defer f.Close()
	s, err := f.Stat()
	if err != nil {
		return nil, err
	}
	return Read[T](f, s.Size())
}

// Write writes the given list of rows to a parquet file written to w.
//
// This function is provided for convenience to facilitate the creation of
// parquet files.
func Write[T any](w io.Writer, rows []T, options ...WriterOption) error {
	config, err := NewWriterConfig(options...)
	if err != nil {
		return err
	}
	writer := NewGenericWriter[T](w, config)
	if _, err := writer.Write(rows); err != nil {
		return err
	}
	return writer.Close()
}

// Write writes the given list of rows to a parquet file written to w.
//
// This function is provided for convenience to facilitate writing parquet
// files to the file system.
func WriteFile[T any](path string, rows []T, options ...WriterOption) error {
	f, err := os.Create(path)
	if err != nil {
		return err
	}
	defer f.Close()
	return Write(f, rows, options...)
}


================================================
FILE: parquet_go18_test.go
================================================
//go:build go1.18

package parquet_test

import (
	"bytes"
	"fmt"
	"io"
	"log"
	"os"
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go"
	"google.golang.org/protobuf/types/known/structpb"
)

func ExampleReadFile() {
	type Row struct {
		ID   int64  `parquet:"id"`
		Name string `parquet:"name,zstd"`
	}

	ExampleWriteFile()

	rows, err := parquet.ReadFile[Row]("/tmp/file.parquet")
	if err != nil {
		log.Fatal(err)
	}

	for _, row := range rows {
		fmt.Printf("%d: %q\n", row.ID, row.Name)
	}

	// Output:
	// 0: "Bob"
	// 1: "Alice"
	// 2: "Franky"
}

func ExampleWriteFile() {
	type Row struct {
		ID   int64  `parquet:"id"`
		Name string `parquet:"name,zstd"`
	}

	if err := parquet.WriteFile("/tmp/file.parquet", []Row{
		{ID: 0, Name: "Bob"},
		{ID: 1, Name: "Alice"},
		{ID: 2, Name: "Franky"},
	}); err != nil {
		log.Fatal(err)
	}

	// Output:
}

func ExampleRead_any() {
	type Row struct{ FirstName, LastName string }

	buf := new(bytes.Buffer)
	err := parquet.Write(buf, []Row{
		{FirstName: "Luke", LastName: "Skywalker"},
		{FirstName: "Han", LastName: "Solo"},
		{FirstName: "R2", LastName: "D2"},
	})
	if err != nil {
		log.Fatal(err)
	}

	file := bytes.NewReader(buf.Bytes())

	rows, err := parquet.Read[any](file, file.Size())
	if err != nil {
		log.Fatal(err)
	}

	for _, row := range rows {
		fmt.Printf("%q\n", row)
	}

	// Output:
	// map["FirstName":"Luke" "LastName":"Skywalker"]
	// map["FirstName":"Han" "LastName":"Solo"]
	// map["FirstName":"R2" "LastName":"D2"]
}

func ExampleWrite_any() {
	schema := parquet.SchemaOf(struct {
		FirstName string
		LastName  string
	}{})

	buf := new(bytes.Buffer)
	err := parquet.Write[any](
		buf,
		[]any{
			map[string]string{"FirstName": "Luke", "LastName": "Skywalker"},
			map[string]string{"FirstName": "Han", "LastName": "Solo"},
			map[string]string{"FirstName": "R2", "LastName": "D2"},
		},
		schema,
	)
	if err != nil {
		log.Fatal(err)
	}

	file := bytes.NewReader(buf.Bytes())

	rows, err := parquet.Read[any](file, file.Size())
	if err != nil {
		log.Fatal(err)
	}

	for _, row := range rows {
		fmt.Printf("%q\n", row)
	}

	// Output:
	// map["FirstName":"Luke" "LastName":"Skywalker"]
	// map["FirstName":"Han" "LastName":"Solo"]
	// map["FirstName":"R2" "LastName":"D2"]
}

func ExampleSearch() {
	type Row struct{ FirstName, LastName string }

	buf := new(bytes.Buffer)
	// The column being searched should be sorted to avoid a full scan of the
	// column. See the section of the readme on sorting for how to sort on
	// insertion into the parquet file using parquet.SortingColumns
	rows := []Row{
		{FirstName: "C", LastName: "3PO"},
		{FirstName: "Han", LastName: "Solo"},
		{FirstName: "Leia", LastName: "Organa"},
		{FirstName: "Luke", LastName: "Skywalker"},
		{FirstName: "R2", LastName: "D2"},
	}
	// The tiny page buffer size ensures we get multiple pages out of the example above.
	w := parquet.NewGenericWriter[Row](buf, parquet.PageBufferSize(12), parquet.WriteBufferSize(0))
	// Need to write 1 row at a time here as writing many at once disregards PageBufferSize option.
	for _, row := range rows {
		_, err := w.Write([]Row{row})
		if err != nil {
			log.Fatal(err)
		}
	}
	err := w.Close()
	if err != nil {
		log.Fatal(err)
	}

	reader := bytes.NewReader(buf.Bytes())
	file, err := parquet.OpenFile(reader, reader.Size())
	if err != nil {
		log.Fatal(err)
	}

	// Search is scoped to a single RowGroup/ColumnChunk
	rowGroup := file.RowGroups()[0]
	firstNameColChunk := rowGroup.ColumnChunks()[0]

	found := parquet.Search(firstNameColChunk.ColumnIndex(), parquet.ValueOf("Luke"), parquet.ByteArrayType)
	offsetIndex := firstNameColChunk.OffsetIndex()
	fmt.Printf("numPages: %d\n", offsetIndex.NumPages())
	fmt.Printf("result found in page: %d\n", found)
	if found < offsetIndex.NumPages() {
		r := parquet.NewGenericReader[Row](file)
		defer r.Close()
		// Seek to the first row in the page the result was found
		r.SeekToRow(offsetIndex.FirstRowIndex(found))
		result := make([]Row, 2)
		_, _ = r.Read(result)
		// Leia is in index 0 for the page.
		for _, row := range result {
			if row.FirstName == "Luke" {
				fmt.Printf("%q\n", row)
			}
		}
	}

	// Output:
	// numPages: 3
	// result found in page: 1
	// {"Luke" "Skywalker"}
}

func TestIssue360(t *testing.T) {
	type TestType struct {
		Key []int
	}

	schema := parquet.SchemaOf(TestType{})
	buffer := parquet.NewGenericBuffer[any](schema)

	data := make([]any, 1)
	data[0] = TestType{Key: []int{1}}
	_, err := buffer.Write(data)
	if err != nil {
		fmt.Println("Exiting with error: ", err)
		return
	}

	var out bytes.Buffer
	writer := parquet.NewGenericWriter[any](&out, schema)

	_, err = parquet.CopyRows(writer, buffer.Rows())
	if err != nil {
		fmt.Println("Exiting with error: ", err)
		return
	}
	writer.Close()

	br := bytes.NewReader(out.Bytes())
	rows, _ := parquet.Read[any](br, br.Size())

	expect := []any{
		map[string]any{
			"Key": []any{
				int64(1),
			},
		},
	}

	assertRowsEqual(t, expect, rows)
}

func TestIssue362ParquetReadFromGenericReaders(t *testing.T) {
	path := "testdata/dms_test_table_LOAD00000001.parquet"
	fp, err := os.Open(path)
	if err != nil {
		t.Fatal(err)
	}
	defer fp.Close()

	r1 := parquet.NewGenericReader[any](fp)
	rows1 := make([]any, r1.NumRows())
	_, err = r1.Read(rows1)
	if err != nil && err != io.EOF {
		t.Fatal(err)
	}

	r2 := parquet.NewGenericReader[any](fp)
	rows2 := make([]any, r2.NumRows())
	_, err = r2.Read(rows2)
	if err != nil && err != io.EOF {
		t.Fatal(err)
	}
}

func TestIssue362ParquetReadFile(t *testing.T) {
	rows1, err := parquet.ReadFile[any]("testdata/dms_test_table_LOAD00000001.parquet")
	if err != nil {
		t.Fatal(err)
	}

	rows2, err := parquet.ReadFile[any]("testdata/dms_test_table_LOAD00000001.parquet")
	if err != nil {
		t.Fatal(err)
	}

	assertRowsEqual(t, rows1, rows2)
}

func TestIssue368(t *testing.T) {
	f, err := os.Open("testdata/issue368.parquet")
	if err != nil {
		t.Fatal(err)
	}
	defer f.Close()

	info, err := f.Stat()
	if err != nil {
		t.Fatal(err)
	}

	pf, err := parquet.OpenFile(f, info.Size())
	if err != nil {
		t.Fatal(err)
	}

	reader := parquet.NewGenericReader[any](pf)
	defer reader.Close()

	trs := make([]any, 1)
	for {
		_, err := reader.Read(trs)
		if err != nil {
			break
		}
	}
}

func TestIssue377(t *testing.T) {
	type People struct {
		Name string
		Age  int
	}

	type Nested struct {
		P  []People
		F  string
		GF string
	}
	row1 := Nested{P: []People{
		{
			Name: "Bob",
			Age:  10,
		}}}
	ods := []Nested{
		row1,
	}
	buf := new(bytes.Buffer)
	w := parquet.NewGenericWriter[Nested](buf)
	_, err := w.Write(ods)
	if err != nil {
		t.Fatal("write error: ", err)
	}
	w.Close()

	file := bytes.NewReader(buf.Bytes())
	rows, err := parquet.Read[Nested](file, file.Size())
	if err != nil {
		t.Fatal("read error: ", err)
	}

	assertRowsEqual(t, rows, ods)
}

func TestIssue423(t *testing.T) {
	type Inner struct {
		Value string `parquet:","`
	}
	type Outer struct {
		Label string  `parquet:","`
		Inner Inner   `parquet:",json"`
		Slice []Inner `parquet:",json"`
		// This is the only tricky situation. Because we're delegating to json Marshaler/Unmarshaler
		// We use the json tags for optionality.
		Ptr *Inner `json:",omitempty" parquet:",json"`

		// This tests BC behavior that slices of bytes and json strings still get written/read in a BC way.
		String        string                     `parquet:",json"`
		Bytes         []byte                     `parquet:",json"`
		MapOfStructPb map[string]*structpb.Value `parquet:",json"`
		StructPB      *structpb.Value            `parquet:",json"`
	}

	writeRows := []Outer{
		{
			Label: "welp",
			Inner: Inner{
				Value: "this is a string",
			},
			Slice: []Inner{
				{
					Value: "in a slice",
				},
			},
			Ptr:    nil,
			String: `{"hello":"world"}`,
			Bytes:  []byte(`{"goodbye":"world"}`),
			MapOfStructPb: map[string]*structpb.Value{
				"answer": structpb.NewNumberValue(42.00),
			},
			StructPB: structpb.NewBoolValue(true),
		},
		{
			Label: "foxes",
			Inner: Inner{
				Value: "the quick brown fox jumped over the yellow lazy dog.",
			},
			Slice: []Inner{
				{
					Value: "in a slice",
				},
			},
			Ptr: &Inner{
				Value: "not nil",
			},
			String: `{"hello":"world"}`,
			Bytes:  []byte(`{"goodbye":"world"}`),
			MapOfStructPb: map[string]*structpb.Value{
				"doubleAnswer": structpb.NewNumberValue(84.00),
			},
			StructPB: structpb.NewBoolValue(false),
		},
	}

	schema := parquet.SchemaOf(new(Outer))
	fmt.Println(schema.String())
	buf := new(bytes.Buffer)
	w := parquet.NewGenericWriter[Outer](buf, schema)
	_, err := w.Write(writeRows)
	if err != nil {
		t.Fatal("write error: ", err)
	}
	w.Close()

	file := bytes.NewReader(buf.Bytes())
	readRows, err := parquet.Read[Outer](file, file.Size())
	if err != nil {
		t.Fatal("read error: ", err)
	}

	assertRowsEqual(t, writeRows, readRows)
}

func TestReadFileGenericMultipleRowGroupsMultiplePages(t *testing.T) {
	type MyRow struct {
		ID    [16]byte `parquet:"id,delta,uuid"`
		File  string   `parquet:"file,dict,zstd"`
		Index int64    `parquet:"index,delta,zstd"`
	}

	numRows := 20_000
	maxPageBytes := 5000

	tmp, err := os.CreateTemp("/tmp", "*.parquet")
	if err != nil {
		t.Fatal("os.CreateTemp: ", err)
	}
	path := tmp.Name()
	defer os.Remove(path)
	t.Log("file:", path)

	// The page buffer size ensures we get multiple pages out of this example.
	w := parquet.NewGenericWriter[MyRow](tmp, parquet.PageBufferSize(maxPageBytes))
	// Need to write 1 row at a time here as writing many at once disregards PageBufferSize option.
	for i := 0; i < numRows; i++ {
		row := MyRow{
			ID:    [16]byte{15: byte(i)},
			File:  "hi" + fmt.Sprint(i),
			Index: int64(i),
		}
		_, err := w.Write([]MyRow{row})
		if err != nil {
			t.Fatal("w.Write: ", err)
		}
		// Flush writes rows as row group. 4 total (20k/5k) in this file.
		if (i+1)%maxPageBytes == 0 {
			err = w.Flush()
			if err != nil {
				t.Fatal("w.Flush: ", err)
			}
		}
	}
	err = w.Close()
	if err != nil {
		t.Fatal("w.Close: ", err)
	}
	err = tmp.Close()
	if err != nil {
		t.Fatal("tmp.Close: ", err)
	}

	rows, err := parquet.ReadFile[MyRow](path)
	if err != nil {
		t.Fatal("parquet.ReadFile: ", err)
	}

	if len(rows) != numRows {
		t.Fatalf("not enough values were read: want=%d got=%d", len(rows), numRows)
	}
	for i, row := range rows {
		id := [16]byte{15: byte(i)}
		file := "hi" + fmt.Sprint(i)
		index := int64(i)

		if row.ID != id || row.File != file || row.Index != index {
			t.Fatalf("rows mismatch at index: %d got: %+v", i, row)
		}
	}
}

func assertRowsEqual[T any](t *testing.T, rows1, rows2 []T) {
	if !reflect.DeepEqual(rows1, rows2) {
		t.Error("rows mismatch")

		t.Log("want:")
		logRows(t, rows1)

		t.Log("got:")
		logRows(t, rows2)
	}
}

func logRows[T any](t *testing.T, rows []T) {
	for _, row := range rows {
		t.Logf(". %#v\n", row)
	}
}


================================================
FILE: parquet_test.go
================================================
package parquet_test

import (
	"bytes"
	"fmt"
	"io"
	"math/rand"
	"reflect"
	"strings"
	"testing"
	"time"

	"github.com/google/uuid"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/internal/quick"
)

const (
	benchmarkNumRows     = 10_000
	benchmarkRowsPerStep = 1000
)

type benchmarkRowType struct {
	ID    [16]byte `parquet:"id,uuid"`
	Value float64  `parquet:"value"`
}

func (row benchmarkRowType) generate(prng *rand.Rand) benchmarkRowType {
	prng.Read(row.ID[:])
	row.Value = prng.Float64()
	return row
}

type paddedBooleanColumn struct {
	Value bool
	_     [3]byte
}

func (row paddedBooleanColumn) generate(prng *rand.Rand) paddedBooleanColumn {
	return paddedBooleanColumn{Value: prng.Int()%2 == 0}
}

type booleanColumn struct {
	Value bool
}

func (row booleanColumn) generate(prng *rand.Rand) booleanColumn {
	return booleanColumn{Value: prng.Int()%2 == 0}
}

type int32Column struct {
	Value int32 `parquet:",delta"`
}

func (row int32Column) generate(prng *rand.Rand) int32Column {
	return int32Column{Value: prng.Int31n(100)}
}

type int64Column struct {
	Value int64 `parquet:",delta"`
}

func (row int64Column) generate(prng *rand.Rand) int64Column {
	return int64Column{Value: prng.Int63n(100)}
}

type int96Column struct {
	Value deprecated.Int96
}

func (row int96Column) generate(prng *rand.Rand) int96Column {
	row.Value[0] = prng.Uint32()
	row.Value[1] = prng.Uint32()
	row.Value[2] = prng.Uint32()
	return row
}

type floatColumn struct {
	Value float32
}

func (row floatColumn) generate(prng *rand.Rand) floatColumn {
	return floatColumn{Value: prng.Float32()}
}

type doubleColumn struct {
	Value float64
}

func (row doubleColumn) generate(prng *rand.Rand) doubleColumn {
	return doubleColumn{Value: prng.Float64()}
}

type byteArrayColumn struct {
	Value []byte
}

func (row byteArrayColumn) generate(prng *rand.Rand) byteArrayColumn {
	row.Value = make([]byte, prng.Intn(10))
	prng.Read(row.Value)
	return row
}

type fixedLenByteArrayColumn struct {
	Value [10]byte
}

func (row fixedLenByteArrayColumn) generate(prng *rand.Rand) fixedLenByteArrayColumn {
	prng.Read(row.Value[:])
	return row
}

type stringColumn struct {
	Value string
}

func (row stringColumn) generate(prng *rand.Rand) stringColumn {
	return stringColumn{Value: generateString(prng, 10)}
}

type indexedStringColumn struct {
	Value string `parquet:",dict"`
}

func (row indexedStringColumn) generate(prng *rand.Rand) indexedStringColumn {
	return indexedStringColumn{Value: generateString(prng, 10)}
}

type uuidColumn struct {
	Value uuid.UUID `parquet:",delta"`
}

func (row uuidColumn) generate(prng *rand.Rand) uuidColumn {
	prng.Read(row.Value[:])
	return row
}

type timeColumn struct {
	Value time.Time
}

func (row timeColumn) generate(prng *rand.Rand) timeColumn {
	t := time.Unix(0, prng.Int63()).UTC()
	return timeColumn{Value: t}
}

type timeInMillisColumn struct {
	Value time.Time `parquet:",timestamp(millisecond)"`
}

func (row timeInMillisColumn) generate(prng *rand.Rand) timeInMillisColumn {
	t := time.Unix(0, prng.Int63()).UTC()
	return timeInMillisColumn{Value: t}
}

type decimalColumn struct {
	Value int64 `parquet:",decimal(0:3)"`
}

func (row decimalColumn) generate(prng *rand.Rand) decimalColumn {
	return decimalColumn{Value: prng.Int63()}
}

type mapColumn struct {
	Value map[utf8string]int
}

func (row mapColumn) generate(prng *rand.Rand) mapColumn {
	n := prng.Intn(10)
	row.Value = make(map[utf8string]int, n)
	for i := 0; i < n; i++ {
		row.Value[utf8string(generateString(prng, 8))] = prng.Intn(100)
	}
	return row
}

type addressBook struct {
	Owner             utf8string   `parquet:",plain"`
	OwnerPhoneNumbers []utf8string `parquet:",plain"`
	Contacts          []contact
}

type contact struct {
	Name        utf8string `parquet:",plain"`
	PhoneNumber utf8string `parquet:",plain"`
}

func (row contact) generate(prng *rand.Rand) contact {
	return contact{
		Name:        utf8string(generateString(prng, 16)),
		PhoneNumber: utf8string(generateString(prng, 10)),
	}
}

type optionalInt32Column struct {
	Value int32 `parquet:",optional"`
}

func (row optionalInt32Column) generate(prng *rand.Rand) optionalInt32Column {
	return optionalInt32Column{Value: prng.Int31n(100)}
}

type repeatedInt32Column struct {
	Values []int32
}

func (row repeatedInt32Column) generate(prng *rand.Rand) repeatedInt32Column {
	row.Values = make([]int32, prng.Intn(10))
	for i := range row.Values {
		row.Values[i] = prng.Int31n(10)
	}
	return row
}

type listColumn2 struct {
	Value utf8string `parquet:",optional"`
}

type listColumn1 struct {
	List2 []listColumn2 `parquet:",list"`
}

type listColumn0 struct {
	List1 []listColumn1 `parquet:",list"`
}

type nestedListColumn1 struct {
	Level3 []utf8string `parquet:"level3"`
}

type nestedListColumn struct {
	Level1 []nestedListColumn1 `parquet:"level1"`
	Level2 []utf8string        `parquet:"level2"`
}

type utf8string string

func (utf8string) Generate(rand *rand.Rand, size int) reflect.Value {
	const characters = "abcdefghijklmnopqrstuvwxyz1234567890"
	const maxSize = 10
	if size > maxSize {
		size = maxSize
	}
	n := rand.Intn(size)
	b := make([]byte, n)
	for i := range b {
		b[i] = characters[rand.Intn(len(characters))]
	}
	return reflect.ValueOf(utf8string(b))
}

type Contact struct {
	Name        string `parquet:"name"`
	PhoneNumber string `parquet:"phoneNumber,optional,zstd"`
}

type AddressBook struct {
	Owner             string    `parquet:"owner,zstd"`
	OwnerPhoneNumbers []string  `parquet:"ownerPhoneNumbers,gzip"`
	Contacts          []Contact `parquet:"contacts"`
}

func forEachLeafColumn(col *parquet.Column, do func(*parquet.Column) error) error {
	children := col.Columns()

	if len(children) == 0 {
		return do(col)
	}

	for _, child := range children {
		if err := forEachLeafColumn(child, do); err != nil {
			return err
		}
	}

	return nil
}

func forEachPage(pages parquet.PageReader, do func(parquet.Page) error) error {
	doAndReleasePage := func(page parquet.Page) error {
		defer parquet.Release(page)
		return do(page)
	}

	for {
		p, err := pages.ReadPage()
		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return err
		}
		if err := doAndReleasePage(p); err != nil {
			return err
		}
	}
}

func forEachValue(values parquet.ValueReader, do func(parquet.Value) error) error {
	buffer := [3]parquet.Value{}
	for {
		n, err := values.ReadValues(buffer[:])
		for _, v := range buffer[:n] {
			if err := do(v); err != nil {
				return err
			}
		}
		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return err
		}
	}
}

func forEachColumnPage(col *parquet.Column, do func(*parquet.Column, parquet.Page) error) error {
	return forEachLeafColumn(col, func(leaf *parquet.Column) error {
		pages := leaf.Pages()
		defer pages.Close()
		return forEachPage(pages, func(page parquet.Page) error { return do(leaf, page) })
	})
}

func forEachColumnValue(col *parquet.Column, do func(*parquet.Column, parquet.Value) error) error {
	return forEachColumnPage(col, func(leaf *parquet.Column, page parquet.Page) error {
		return forEachValue(page.Values(), func(value parquet.Value) error { return do(leaf, value) })
	})
}

func forEachColumnChunk(file *parquet.File, do func(*parquet.Column, parquet.ColumnChunk) error) error {
	return forEachLeafColumn(file.Root(), func(leaf *parquet.Column) error {
		for _, rowGroup := range file.RowGroups() {
			if err := do(leaf, rowGroup.ColumnChunks()[leaf.Index()]); err != nil {
				return err
			}
		}
		return nil
	})
}

func createParquetFile(rows rows, options ...parquet.WriterOption) (*parquet.File, error) {
	buffer := new(bytes.Buffer)

	if err := writeParquetFile(buffer, rows, options...); err != nil {
		return nil, err
	}

	reader := bytes.NewReader(buffer.Bytes())
	return parquet.OpenFile(reader, reader.Size())
}

func writeParquetFile(w io.Writer, rows rows, options ...parquet.WriterOption) error {
	writer := parquet.NewWriter(w, options...)

	for _, row := range rows {
		if err := writer.Write(row); err != nil {
			return err
		}
	}

	return writer.Close()
}

func writeParquetFileWithBuffer(w io.Writer, rows rows, options ...parquet.WriterOption) error {
	buffer := parquet.NewBuffer()
	for _, row := range rows {
		if err := buffer.Write(row); err != nil {
			return err
		}
	}

	writer := parquet.NewWriter(w, options...)
	numRows, err := copyRowsAndClose(writer, buffer.Rows())
	if err != nil {
		return err
	}
	if numRows != int64(len(rows)) {
		return fmt.Errorf("wrong number of rows written from buffer to file: want=%d got=%d", len(rows), numRows)
	}
	return writer.Close()
}

type rows []interface{}

func makeRows(any interface{}) rows {
	if v, ok := any.([]interface{}); ok {
		return rows(v)
	}
	value := reflect.ValueOf(any)
	slice := make([]interface{}, value.Len())
	for i := range slice {
		slice[i] = value.Index(i).Interface()
	}
	return rows(slice)
}

func randValueFuncOf(t parquet.Type) func(*rand.Rand) parquet.Value {
	switch k := t.Kind(); k {
	case parquet.Boolean:
		return func(r *rand.Rand) parquet.Value {
			return parquet.ValueOf(r.Float64() < 0.5)
		}

	case parquet.Int32:
		return func(r *rand.Rand) parquet.Value {
			return parquet.ValueOf(r.Int31())
		}

	case parquet.Int64:
		return func(r *rand.Rand) parquet.Value {
			return parquet.ValueOf(r.Int63())
		}

	case parquet.Int96:
		return func(r *rand.Rand) parquet.Value {
			return parquet.ValueOf(deprecated.Int96{
				0: r.Uint32(),
				1: r.Uint32(),
				2: r.Uint32(),
			})
		}

	case parquet.Float:
		return func(r *rand.Rand) parquet.Value {
			return parquet.ValueOf(r.Float32())
		}

	case parquet.Double:
		return func(r *rand.Rand) parquet.Value {
			return parquet.ValueOf(r.Float64())
		}

	case parquet.ByteArray:
		return func(r *rand.Rand) parquet.Value {
			n := r.Intn(49) + 1
			b := make([]byte, n)
			const characters = "1234567890qwertyuiopasdfghjklzxcvbnm "
			for i := range b {
				b[i] = characters[r.Intn(len(characters))]
			}
			return parquet.ValueOf(b)
		}

	case parquet.FixedLenByteArray:
		arrayType := reflect.ArrayOf(t.Length(), reflect.TypeOf(byte(0)))
		return func(r *rand.Rand) parquet.Value {
			b := make([]byte, arrayType.Len())
			r.Read(b)
			v := reflect.New(arrayType).Elem()
			reflect.Copy(v, reflect.ValueOf(b))
			return parquet.ValueOf(v.Interface())
		}

	default:
		panic("NOT IMPLEMENTED")
	}
}

func copyRowsAndClose(w parquet.RowWriter, r parquet.Rows) (int64, error) {
	defer r.Close()
	return parquet.CopyRows(w, r)
}

func benchmarkRowsPerSecond(b *testing.B, f func() int) {
	b.ResetTimer()
	start := time.Now()
	numRows := int64(0)

	for i := 0; i < b.N; i++ {
		n := f()
		numRows += int64(n)
	}

	seconds := time.Since(start).Seconds()
	b.ReportMetric(float64(numRows)/seconds, "row/s")
}

func generateString(r *rand.Rand, n int) string {
	const characters = "1234567890qwertyuiopasdfghjklzxcvbnm"
	b := new(strings.Builder)
	for i := 0; i < n; i++ {
		b.WriteByte(characters[r.Intn(len(characters))])
	}
	return b.String()
}

var quickCheckConfig = quick.Config{
	Sizes: []int{
		0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
		10, 20, 30, 40, 50, 123,
		4096 + 1,
	},
}

func quickCheck(f interface{}) error {
	return quickCheckConfig.Check(f)
}


================================================
FILE: print.go
================================================
package parquet

import (
	"errors"
	"fmt"
	"io"
	"strconv"
	"strings"

	"github.com/olekukonko/tablewriter"
)

func PrintSchema(w io.Writer, name string, node Node) error {
	return PrintSchemaIndent(w, name, node, "\t", "\n")
}

func PrintSchemaIndent(w io.Writer, name string, node Node, pattern, newline string) error {
	pw := &printWriter{writer: w}
	pi := &printIndent{}

	if node.Leaf() {
		printSchemaWithIndent(pw, "", node, pi)
	} else {
		pw.WriteString("message ")

		if name == "" {
			pw.WriteString("{")
		} else {
			pw.WriteString(name)
			pw.WriteString(" {")
		}

		pi.pattern = pattern
		pi.newline = newline
		pi.repeat = 1
		pi.writeNewLine(pw)

		for _, field := range node.Fields() {
			printSchemaWithIndent(pw, field.Name(), field, pi)
			pi.writeNewLine(pw)
		}

		pw.WriteString("}")
	}

	return pw.err
}

func printSchemaWithIndent(w io.StringWriter, name string, node Node, indent *printIndent) {
	indent.writeTo(w)

	switch {
	case node.Optional():
		w.WriteString("optional ")
	case node.Repeated():
		w.WriteString("repeated ")
	default:
		w.WriteString("required ")
	}

	if node.Leaf() {
		t := node.Type()
		switch t.Kind() {
		case Boolean:
			w.WriteString("boolean")
		case Int32:
			w.WriteString("int32")
		case Int64:
			w.WriteString("int64")
		case Int96:
			w.WriteString("int96")
		case Float:
			w.WriteString("float")
		case Double:
			w.WriteString("double")
		case ByteArray:
			w.WriteString("binary")
		case FixedLenByteArray:
			w.WriteString("fixed_len_byte_array(")
			w.WriteString(strconv.Itoa(t.Length()))
			w.WriteString(")")
		default:
			w.WriteString("<?>")
		}

		if name != "" {
			w.WriteString(" ")
			w.WriteString(name)
		}

		if annotation := annotationOf(node); annotation != "" {
			w.WriteString(" (")
			w.WriteString(annotation)
			w.WriteString(")")
		}

		w.WriteString(";")
	} else {
		w.WriteString("group")

		if name != "" {
			w.WriteString(" ")
			w.WriteString(name)
		}

		if annotation := annotationOf(node); annotation != "" {
			w.WriteString(" (")
			w.WriteString(annotation)
			w.WriteString(")")
		}

		w.WriteString(" {")
		indent.writeNewLine(w)
		indent.push()

		for _, field := range node.Fields() {
			printSchemaWithIndent(w, field.Name(), field, indent)
			indent.writeNewLine(w)
		}

		indent.pop()
		indent.writeTo(w)
		w.WriteString("}")
	}
}

func annotationOf(node Node) string {
	if logicalType := node.Type().LogicalType(); logicalType != nil {
		return logicalType.String()
	}
	return ""
}

type printIndent struct {
	pattern string
	newline string
	repeat  int
}

func (i *printIndent) push() {
	i.repeat++
}

func (i *printIndent) pop() {
	i.repeat--
}

func (i *printIndent) writeTo(w io.StringWriter) {
	if i.pattern != "" {
		for n := i.repeat; n > 0; n-- {
			w.WriteString(i.pattern)
		}
	}
}

func (i *printIndent) writeNewLine(w io.StringWriter) {
	if i.newline != "" {
		w.WriteString(i.newline)
	}
}

type printWriter struct {
	writer io.Writer
	err    error
}

func (w *printWriter) Write(b []byte) (int, error) {
	if w.err != nil {
		return 0, w.err
	}
	n, err := w.writer.Write(b)
	if err != nil {
		w.err = err
	}
	return n, err
}

func (w *printWriter) WriteString(s string) (int, error) {
	if w.err != nil {
		return 0, w.err
	}
	n, err := io.WriteString(w.writer, s)
	if err != nil {
		w.err = err
	}
	return n, err
}

var (
	_ io.StringWriter = (*printWriter)(nil)
)

func sprint(name string, node Node) string {
	s := new(strings.Builder)
	PrintSchema(s, name, node)
	return s.String()
}

func PrintRowGroup(w io.Writer, rowGroup RowGroup) error {
	schema := rowGroup.Schema()
	pw := &printWriter{writer: w}
	tw := tablewriter.NewWriter(pw)

	columns := schema.Columns()
	header := make([]string, len(columns))
	footer := make([]string, len(columns))
	alignment := make([]int, len(columns))

	for i, column := range columns {
		leaf, _ := schema.Lookup(column...)
		columnType := leaf.Node.Type()

		header[i] = strings.Join(column, ".")
		footer[i] = columnType.String()

		switch columnType.Kind() {
		case ByteArray:
			alignment[i] = tablewriter.ALIGN_LEFT
		default:
			alignment[i] = tablewriter.ALIGN_RIGHT
		}
	}

	rowbuf := make([]Row, defaultRowBufferSize)
	cells := make([]string, 0, len(columns))
	rows := rowGroup.Rows()
	defer rows.Close()

	for {
		n, err := rows.ReadRows(rowbuf)

		for _, row := range rowbuf[:n] {
			cells = cells[:0]

			for _, value := range row {
				columnIndex := value.Column()

				for len(cells) <= columnIndex {
					cells = append(cells, "")
				}

				if cells[columnIndex] == "" {
					cells[columnIndex] = value.String()
				} else {
					cells[columnIndex] += "," + value.String()
					alignment[columnIndex] = tablewriter.ALIGN_LEFT
				}
			}

			tw.Append(cells)
		}

		if err != nil {
			if errors.Is(err, io.EOF) {
				break
			}
			return err
		}
	}

	tw.SetAutoFormatHeaders(false)
	tw.SetColumnAlignment(alignment)
	tw.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
	tw.SetFooterAlignment(tablewriter.ALIGN_LEFT)
	tw.SetHeader(header)
	tw.SetFooter(footer)
	tw.Render()

	fmt.Fprintf(pw, "%d rows\n\n", rowGroup.NumRows())
	return pw.err
}

func PrintColumnChunk(w io.Writer, columnChunk ColumnChunk) error {
	pw := &printWriter{writer: w}
	pw.WriteString(columnChunk.Type().String())
	pw.WriteString("\n--------------------------------------------------------------------------------\n")

	values := [42]Value{}
	pages := columnChunk.Pages()
	numPages, numValues := int64(0), int64(0)

	defer pages.Close()
	for {
		p, err := pages.ReadPage()
		if err != nil {
			if !errors.Is(err, io.EOF) {
				return err
			}
			break
		}

		numPages++
		n := p.NumValues()
		if n == 0 {
			fmt.Fprintf(pw, "*** page %d, no values ***\n", numPages)
		} else {
			fmt.Fprintf(pw, "*** page %d, values %d to %d ***\n", numPages, numValues+1, numValues+n)
			printPage(w, p, values[:], numValues+1)
			numValues += n
		}

		pw.WriteString("\n")
	}

	return pw.err
}

func PrintPage(w io.Writer, page Page) error {
	return printPage(w, page, make([]Value, 42), 0)
}

func printPage(w io.Writer, page Page, values []Value, numValues int64) error {
	r := page.Values()
	for {
		n, err := r.ReadValues(values[:])
		for i, v := range values[:n] {
			_, err := fmt.Fprintf(w, "value %d: %+v\n", numValues+int64(i), v)
			if err != nil {
				return err
			}
		}
		if err != nil {
			if errors.Is(err, io.EOF) {
				err = nil
			}
			return err
		}
	}
}


================================================
FILE: print_test.go
================================================
package parquet_test

import (
	"strings"
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestPrintSchema(t *testing.T) {
	tests := []struct {
		node  parquet.Node
		print string
	}{
		{
			node: parquet.Group{"on": parquet.Leaf(parquet.BooleanType)},
			print: `message Test {
	required boolean on;
}`,
		},

		{
			node: parquet.Group{"name": parquet.String()},
			print: `message Test {
	required binary name (STRING);
}`,
		},

		{
			node: parquet.Group{"uuid": parquet.UUID()},
			print: `message Test {
	required fixed_len_byte_array(16) uuid (UUID);
}`,
		},

		{
			node: parquet.Group{"enum": parquet.Enum()},
			print: `message Test {
	required binary enum (ENUM);
}`,
		},

		{
			node: parquet.Group{"json": parquet.JSON()},
			print: `message Test {
	required binary json (JSON);
}`,
		},

		{
			node: parquet.Group{"bson": parquet.BSON()},
			print: `message Test {
	required binary bson (BSON);
}`,
		},

		{
			node: parquet.Group{"name": parquet.Optional(parquet.String())},
			print: `message Test {
	optional binary name (STRING);
}`,
		},

		{
			node: parquet.Group{"name": parquet.Repeated(parquet.String())},
			print: `message Test {
	repeated binary name (STRING);
}`,
		},

		{
			node: parquet.Group{"age": parquet.Int(8)},
			print: `message Test {
	required int32 age (INT(8,true));
}`,
		},

		{
			node: parquet.Group{"age": parquet.Int(16)},
			print: `message Test {
	required int32 age (INT(16,true));
}`,
		},

		{
			node: parquet.Group{"age": parquet.Int(32)},
			print: `message Test {
	required int32 age (INT(32,true));
}`,
		},

		{
			node: parquet.Group{"age": parquet.Int(64)},
			print: `message Test {
	required int64 age (INT(64,true));
}`,
		},

		{
			node: parquet.Group{"age": parquet.Uint(8)},
			print: `message Test {
	required int32 age (INT(8,false));
}`,
		},

		{
			node: parquet.Group{"age": parquet.Uint(16)},
			print: `message Test {
	required int32 age (INT(16,false));
}`,
		},

		{
			node: parquet.Group{"age": parquet.Uint(32)},
			print: `message Test {
	required int32 age (INT(32,false));
}`,
		},

		{
			node: parquet.Group{"age": parquet.Uint(64)},
			print: `message Test {
	required int64 age (INT(64,false));
}`,
		},

		{
			node: parquet.Group{"ratio": parquet.Leaf(parquet.FloatType)},
			print: `message Test {
	required float ratio;
}`,
		},

		{
			node: parquet.Group{"ratio": parquet.Leaf(parquet.DoubleType)},
			print: `message Test {
	required double ratio;
}`,
		},

		{
			node: parquet.Group{"cost": parquet.Decimal(0, 9, parquet.Int32Type)},
			print: `message Test {
	required int32 cost (DECIMAL(9,0));
}`,
		},

		{
			node: parquet.Group{"cost": parquet.Decimal(0, 18, parquet.Int64Type)},
			print: `message Test {
	required int64 cost (DECIMAL(18,0));
}`,
		},

		{
			node: parquet.Group{"date": parquet.Date()},
			print: `message Test {
	required int32 date (DATE);
}`,
		},

		{
			node: parquet.Group{"time": parquet.Time(parquet.Millisecond)},
			print: `message Test {
	required int32 time (TIME(isAdjustedToUTC=true,unit=MILLIS));
}`,
		},

		{
			node: parquet.Group{"time": parquet.Time(parquet.Microsecond)},
			print: `message Test {
	required int64 time (TIME(isAdjustedToUTC=true,unit=MICROS));
}`,
		},

		{
			node: parquet.Group{"time": parquet.Time(parquet.Nanosecond)},
			print: `message Test {
	required int64 time (TIME(isAdjustedToUTC=true,unit=NANOS));
}`,
		},

		{
			node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Millisecond)},
			print: `message Test {
	required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
}`,
		},

		{
			node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Microsecond)},
			print: `message Test {
	required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
}`,
		},

		{
			node: parquet.Group{"timestamp": parquet.Timestamp(parquet.Nanosecond)},
			print: `message Test {
	required int64 timestamp (TIMESTAMP(isAdjustedToUTC=true,unit=NANOS));
}`,
		},

		{
			node: parquet.Group{"names": parquet.List(parquet.String())},
			print: `message Test {
	required group names (LIST) {
		repeated group list {
			required binary element (STRING);
		}
	}
}`,
		},

		{
			node: parquet.Group{
				"keys": parquet.List(
					parquet.Group{
						"key":   parquet.String(),
						"value": parquet.String(),
					},
				),
			},
			print: `message Test {
	required group keys (LIST) {
		repeated group list {
			required group element {
				required binary key (STRING);
				required binary value (STRING);
			}
		}
	}
}`,
		},

		{
			node: parquet.Group{
				"pairs": parquet.Map(
					parquet.String(),
					parquet.String(),
				),
			},
			print: `message Test {
	required group pairs (MAP) {
		repeated group key_value {
			required binary key (STRING);
			required binary value (STRING);
		}
	}
}`,
		},
	}

	for _, test := range tests {
		t.Run("", func(t *testing.T) {
			buf := new(strings.Builder)

			if err := parquet.PrintSchema(buf, "Test", test.node); err != nil {
				t.Fatal(err)
			}

			if buf.String() != test.print {
				t.Errorf("\nexpected:\n\n%s\n\nfound:\n\n%s\n", test.print, buf)
			}
		})
	}
}


================================================
FILE: reader.go
================================================
package parquet

import (
	"errors"
	"fmt"
	"io"
	"reflect"
)

// Deprecated: A Reader reads Go values from parquet files.
//
// This example showcases a typical use of parquet readers:
//
//	reader := parquet.NewReader(file)
//	rows := []RowType{}
//	for {
//		row := RowType{}
//		err := reader.Read(&row)
//		if err != nil {
//			if err == io.EOF {
//				break
//			}
//			...
//		}
//		rows = append(rows, row)
//	}
//	if err := reader.Close(); err != nil {
//		...
//	}
//
// For programs building with Go 1.18 or later, the GenericReader[T] type
// supersedes this one.
type Reader struct {
	seen     reflect.Type
	file     reader
	read     reader
	rowIndex int64
	rowbuf   []Row
}

// NewReader constructs a parquet reader reading rows from the given
// io.ReaderAt.
//
// In order to read parquet rows, the io.ReaderAt must be converted to a
// parquet.File. If r is already a parquet.File it is used directly; otherwise,
// the io.ReaderAt value is expected to either have a `Size() int64` method or
// implement io.Seeker in order to determine its size.
//
// The function panics if the reader configuration is invalid. Programs that
// cannot guarantee the validity of the options passed to NewReader should
// construct the reader configuration independently prior to calling this
// function:
//
//	config, err := parquet.NewReaderConfig(options...)
//	if err != nil {
//		// handle the configuration error
//		...
//	} else {
//		// this call to create a reader is guaranteed not to panic
//		reader := parquet.NewReader(input, config)
//		...
//	}
func NewReader(input io.ReaderAt, options ...ReaderOption) *Reader {
	c, err := NewReaderConfig(options...)
	if err != nil {
		panic(err)
	}

	f, err := openFile(input)
	if err != nil {
		panic(err)
	}

	r := &Reader{
		file: reader{
			schema:   f.schema,
			rowGroup: fileRowGroupOf(f),
		},
	}

	if c.Schema != nil {
		r.file.schema = c.Schema
		r.file.rowGroup = convertRowGroupTo(r.file.rowGroup, c.Schema)
	}

	r.read.init(r.file.schema, r.file.rowGroup)
	return r
}

func openFile(input io.ReaderAt) (*File, error) {
	f, _ := input.(*File)
	if f != nil {
		return f, nil
	}
	n, err := sizeOf(input)
	if err != nil {
		return nil, err
	}
	return OpenFile(input, n)
}

func fileRowGroupOf(f *File) RowGroup {
	switch rowGroups := f.RowGroups(); len(rowGroups) {
	case 0:
		return newEmptyRowGroup(f.Schema())
	case 1:
		return rowGroups[0]
	default:
		// TODO: should we attempt to merge the row groups via MergeRowGroups
		// to preserve the global order of sorting columns within the file?
		return newMultiRowGroup(f.config.ReadMode, rowGroups...)
	}
}

// NewRowGroupReader constructs a new Reader which reads rows from the RowGroup
// passed as argument.
func NewRowGroupReader(rowGroup RowGroup, options ...ReaderOption) *Reader {
	c, err := NewReaderConfig(options...)
	if err != nil {
		panic(err)
	}

	if c.Schema != nil {
		rowGroup = convertRowGroupTo(rowGroup, c.Schema)
	}

	r := &Reader{
		file: reader{
			schema:   rowGroup.Schema(),
			rowGroup: rowGroup,
		},
	}

	r.read.init(r.file.schema, r.file.rowGroup)
	return r
}

func convertRowGroupTo(rowGroup RowGroup, schema *Schema) RowGroup {
	if rowGroupSchema := rowGroup.Schema(); !nodesAreEqual(schema, rowGroupSchema) {
		conv, err := Convert(schema, rowGroupSchema)
		if err != nil {
			// TODO: this looks like something we should not be panicking on,
			// but the current NewReader API does not offer a mechanism to
			// report errors.
			panic(err)
		}
		rowGroup = ConvertRowGroup(rowGroup, conv)
	}
	return rowGroup
}

func sizeOf(r io.ReaderAt) (int64, error) {
	switch f := r.(type) {
	case interface{ Size() int64 }:
		return f.Size(), nil
	case io.Seeker:
		off, err := f.Seek(0, io.SeekCurrent)
		if err != nil {
			return 0, err
		}
		end, err := f.Seek(0, io.SeekEnd)
		if err != nil {
			return 0, err
		}
		_, err = f.Seek(off, io.SeekStart)
		return end, err
	default:
		return 0, fmt.Errorf("cannot determine length of %T", r)
	}
}

// Reset repositions the reader at the beginning of the underlying parquet file.
func (r *Reader) Reset() {
	r.file.Reset()
	r.read.Reset()
	r.rowIndex = 0
	clearRows(r.rowbuf)
}

// Read reads the next row from r. The type of the row must match the schema
// of the underlying parquet file or an error will be returned.
//
// The method returns io.EOF when no more rows can be read from r.
func (r *Reader) Read(row interface{}) error {
	if rowType := dereference(reflect.TypeOf(row)); rowType.Kind() == reflect.Struct {
		if r.seen != rowType {
			if err := r.updateReadSchema(rowType); err != nil {
				return fmt.Errorf("cannot read parquet row into go value of type %T: %w", row, err)
			}
		}
	}

	if err := r.read.SeekToRow(r.rowIndex); err != nil {
		if errors.Is(err, io.ErrClosedPipe) {
			return io.EOF
		}
		return fmt.Errorf("seeking reader to row %d: %w", r.rowIndex, err)
	}

	if cap(r.rowbuf) == 0 {
		r.rowbuf = make([]Row, 1)
	} else {
		r.rowbuf = r.rowbuf[:1]
	}

	n, err := r.read.ReadRows(r.rowbuf[:])
	if n == 0 {
		return err
	}

	r.rowIndex++
	return r.read.schema.Reconstruct(row, r.rowbuf[0])
}

func (r *Reader) updateReadSchema(rowType reflect.Type) error {
	schema := schemaOf(rowType)

	if nodesAreEqual(schema, r.file.schema) {
		r.read.init(schema, r.file.rowGroup)
	} else {
		conv, err := Convert(schema, r.file.schema)
		if err != nil {
			return err
		}
		r.read.init(schema, ConvertRowGroup(r.file.rowGroup, conv))
	}

	r.seen = rowType
	return nil
}

// ReadRows reads the next rows from r into the given Row buffer.
//
// The returned values are laid out in the order expected by the
// parquet.(*Schema).Reconstruct method.
//
// The method returns io.EOF when no more rows can be read from r.
func (r *Reader) ReadRows(rows []Row) (int, error) {
	if err := r.file.SeekToRow(r.rowIndex); err != nil {
		return 0, err
	}
	n, err := r.file.ReadRows(rows)
	r.rowIndex += int64(n)
	return n, err
}

// Schema returns the schema of rows read by r.
func (r *Reader) Schema() *Schema { return r.file.schema }

// NumRows returns the number of rows that can be read from r.
func (r *Reader) NumRows() int64 { return r.file.rowGroup.NumRows() }

// SeekToRow positions r at the given row index.
func (r *Reader) SeekToRow(rowIndex int64) error {
	if err := r.file.SeekToRow(rowIndex); err != nil {
		return err
	}
	r.rowIndex = rowIndex
	return nil
}

// Close closes the reader, preventing more rows from being read.
func (r *Reader) Close() error {
	if err := r.read.Close(); err != nil {
		return err
	}
	if err := r.file.Close(); err != nil {
		return err
	}
	return nil
}

// reader is a subtype used in the implementation of Reader to support the two
// use cases of either reading rows calling the ReadRow method (where full rows
// are read from the underlying parquet file), or calling the Read method to
// read rows into Go values, potentially doing partial reads on a subset of the
// columns due to using a converted row group view.
type reader struct {
	schema   *Schema
	rowGroup RowGroup
	rows     Rows
	rowIndex int64
}

func (r *reader) init(schema *Schema, rowGroup RowGroup) {
	r.schema = schema
	r.rowGroup = rowGroup
	r.Reset()
}

func (r *reader) Reset() {
	r.rowIndex = 0

	if rows, ok := r.rows.(interface{ Reset() }); ok {
		// This optimization works for the common case where the underlying type
		// of the Rows instance is rowGroupRows, which should be true in most
		// cases since even external implementations of the RowGroup interface
		// can construct values of this type via the NewRowGroupRowReader
		// function.
		//
		// Foreign implementations of the Rows interface may also define a Reset
		// method in order to participate in this optimization.
		rows.Reset()
		return
	}

	if r.rows != nil {
		r.rows.Close()
		r.rows = nil
	}
}

func (r *reader) ReadRows(rows []Row) (int, error) {
	if r.rowGroup == nil {
		return 0, io.EOF
	}
	if r.rows == nil {
		r.rows = r.rowGroup.Rows()
		if r.rowIndex > 0 {
			if err := r.rows.SeekToRow(r.rowIndex); err != nil {
				return 0, err
			}
		}
	}
	n, err := r.rows.ReadRows(rows)
	r.rowIndex += int64(n)
	return n, err
}

func (r *reader) SeekToRow(rowIndex int64) error {
	if r.rowGroup == nil {
		return io.ErrClosedPipe
	}
	if rowIndex != r.rowIndex {
		if r.rows != nil {
			if err := r.rows.SeekToRow(rowIndex); err != nil {
				return err
			}
		}
		r.rowIndex = rowIndex
	}
	return nil
}

func (r *reader) Close() (err error) {
	r.rowGroup = nil
	if r.rows != nil {
		err = r.rows.Close()
	}
	return err
}

var (
	_ Rows                = (*Reader)(nil)
	_ RowReaderWithSchema = (*Reader)(nil)

	_ RowReader = (*reader)(nil)
	_ RowSeeker = (*reader)(nil)
)


================================================
FILE: reader_go18.go
================================================
//go:build go1.18

package parquet

import (
	"io"
	"reflect"
)

// GenericReader is similar to a Reader but uses a type parameter to define the
// Go type representing the schema of rows being read.
//
// See GenericWriter for details about the benefits over the classic Reader API.
type GenericReader[T any] struct {
	base Reader
	read readFunc[T]
}

// NewGenericReader is like NewReader but returns GenericReader[T] suited to write
// rows of Go type T.
//
// The type parameter T should be a map, struct, or any. Any other types will
// cause a panic at runtime. Type checking is a lot more effective when the
// generic parameter is a struct type, using map and interface types is somewhat
// similar to using a Writer.
//
// If the option list may explicitly declare a schema, it must be compatible
// with the schema generated from T.
func NewGenericReader[T any](input io.ReaderAt, options ...ReaderOption) *GenericReader[T] {
	c, err := NewReaderConfig(options...)
	if err != nil {
		panic(err)
	}

	f, err := openFile(input)
	if err != nil {
		panic(err)
	}

	rowGroup := fileRowGroupOf(f)

	t := typeOf[T]()
	if c.Schema == nil {
		if t == nil {
			c.Schema = rowGroup.Schema()
		} else {
			c.Schema = schemaOf(dereference(t))
		}
	}

	r := &GenericReader[T]{
		base: Reader{
			file: reader{
				schema:   c.Schema,
				rowGroup: rowGroup,
			},
		},
	}

	if !nodesAreEqual(c.Schema, f.schema) {
		r.base.file.rowGroup = convertRowGroupTo(r.base.file.rowGroup, c.Schema)
	}

	r.base.read.init(r.base.file.schema, r.base.file.rowGroup)
	r.read = readFuncOf[T](t, r.base.file.schema)
	return r
}

func NewGenericRowGroupReader[T any](rowGroup RowGroup, options ...ReaderOption) *GenericReader[T] {
	c, err := NewReaderConfig(options...)
	if err != nil {
		panic(err)
	}

	t := typeOf[T]()
	if c.Schema == nil {
		if t == nil {
			c.Schema = rowGroup.Schema()
		} else {
			c.Schema = schemaOf(dereference(t))
		}
	}

	r := &GenericReader[T]{
		base: Reader{
			file: reader{
				schema:   c.Schema,
				rowGroup: rowGroup,
			},
		},
	}

	if !nodesAreEqual(c.Schema, rowGroup.Schema()) {
		r.base.file.rowGroup = convertRowGroupTo(r.base.file.rowGroup, c.Schema)
	}

	r.base.read.init(r.base.file.schema, r.base.file.rowGroup)
	r.read = readFuncOf[T](t, r.base.file.schema)
	return r
}

func (r *GenericReader[T]) Reset() {
	r.base.Reset()
}

// Read reads the next rows from the reader into the given rows slice up to len(rows).
//
// The returned values are safe to reuse across Read calls and do not share
// memory with the reader's underlying page buffers.
//
// The method returns the number of rows read and io.EOF when no more rows
// can be read from the reader.
func (r *GenericReader[T]) Read(rows []T) (int, error) {
	return r.read(r, rows)
}

func (r *GenericReader[T]) ReadRows(rows []Row) (int, error) {
	return r.base.ReadRows(rows)
}

func (r *GenericReader[T]) Schema() *Schema {
	return r.base.Schema()
}

func (r *GenericReader[T]) NumRows() int64 {
	return r.base.NumRows()
}

func (r *GenericReader[T]) SeekToRow(rowIndex int64) error {
	return r.base.SeekToRow(rowIndex)
}

func (r *GenericReader[T]) Close() error {
	return r.base.Close()
}

// readRows reads the next rows from the reader into the given rows slice up to len(rows).
//
// The returned values are safe to reuse across readRows calls and do not share
// memory with the reader's underlying page buffers.
//
// The method returns the number of rows read and io.EOF when no more rows
// can be read from the reader.
func (r *GenericReader[T]) readRows(rows []T) (int, error) {
	nRequest := len(rows)
	if cap(r.base.rowbuf) < nRequest {
		r.base.rowbuf = make([]Row, nRequest)
	} else {
		r.base.rowbuf = r.base.rowbuf[:nRequest]
	}

	var n, nTotal int
	var err error
	for {
		// ReadRows reads the minimum remaining rows in a column page across all columns
		// of the underlying reader, unless the length of the slice passed to it is smaller.
		// In that case, ReadRows will read the number of rows equal to the length of the
		// given slice argument. We limit that length to never be more than requested
		// because sequential reads can cross page boundaries.
		n, err = r.base.ReadRows(r.base.rowbuf[:nRequest-nTotal])
		if n > 0 {
			schema := r.base.Schema()

			for i, row := range r.base.rowbuf[:n] {
				if err2 := schema.Reconstruct(&rows[nTotal+i], row); err2 != nil {
					return nTotal + i, err2
				}
			}
		}
		nTotal += n
		if n == 0 || nTotal == nRequest || err != nil {
			break
		}
	}

	return nTotal, err
}

var (
	_ Rows                = (*GenericReader[any])(nil)
	_ RowReaderWithSchema = (*Reader)(nil)

	_ Rows                = (*GenericReader[struct{}])(nil)
	_ RowReaderWithSchema = (*GenericReader[struct{}])(nil)

	_ Rows                = (*GenericReader[map[struct{}]struct{}])(nil)
	_ RowReaderWithSchema = (*GenericReader[map[struct{}]struct{}])(nil)
)

type readFunc[T any] func(*GenericReader[T], []T) (int, error)

func readFuncOf[T any](t reflect.Type, schema *Schema) readFunc[T] {
	if t == nil {
		return (*GenericReader[T]).readRows
	}
	switch t.Kind() {
	case reflect.Interface, reflect.Map:
		return (*GenericReader[T]).readRows

	case reflect.Struct:
		return (*GenericReader[T]).readRows

	case reflect.Pointer:
		if e := t.Elem(); e.Kind() == reflect.Struct {
			return (*GenericReader[T]).readRows
		}
	}
	panic("cannot create reader for values of type " + t.String())
}


================================================
FILE: reader_go18_test.go
================================================
//go:build go1.18

package parquet_test

import (
	"bytes"
	"errors"
	"fmt"
	"io"
	"math/rand"
	"os"
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestGenericReader(t *testing.T) {
	testGenericReader[booleanColumn](t)
	testGenericReader[int32Column](t)
	testGenericReader[int64Column](t)
	testGenericReader[int96Column](t)
	testGenericReader[floatColumn](t)
	testGenericReader[doubleColumn](t)
	testGenericReader[byteArrayColumn](t)
	testGenericReader[fixedLenByteArrayColumn](t)
	testGenericReader[stringColumn](t)
	testGenericReader[indexedStringColumn](t)
	testGenericReader[uuidColumn](t)
	testGenericReader[timeColumn](t)
	testGenericReader[timeInMillisColumn](t)
	testGenericReader[mapColumn](t)
	testGenericReader[decimalColumn](t)
	testGenericReader[addressBook](t)
	testGenericReader[contact](t)
	testGenericReader[listColumn2](t)
	testGenericReader[listColumn1](t)
	testGenericReader[listColumn0](t)
	testGenericReader[nestedListColumn1](t)
	testGenericReader[nestedListColumn](t)
	testGenericReader[*contact](t)
	testGenericReader[paddedBooleanColumn](t)
	testGenericReader[optionalInt32Column](t)
	testGenericReader[repeatedInt32Column](t)
}

func testGenericReader[Row any](t *testing.T) {
	var model Row
	t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) {
		err := quickCheck(func(rows []Row) bool {
			if len(rows) == 0 {
				return true // TODO: fix support for parquet files with zero rows
			}
			if err := testGenericReaderRows(rows); err != nil {
				t.Error(err)
				return false
			}
			return true
		})
		if err != nil {
			t.Error(err)
		}
	})
}

func testGenericReaderRows[Row any](rows []Row) error {
	setNullPointers(rows)
	buffer := new(bytes.Buffer)
	writer := parquet.NewGenericWriter[Row](buffer)
	_, err := writer.Write(rows)
	if err != nil {
		return err
	}
	if err := writer.Close(); err != nil {
		return err
	}
	reader := parquet.NewGenericReader[Row](bytes.NewReader(buffer.Bytes()))
	result := make([]Row, len(rows))
	n, err := reader.Read(result)
	if err != nil && !errors.Is(err, io.EOF) {
		return err
	}
	if n < len(rows) {
		return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n)
	}
	if !reflect.DeepEqual(rows, result) {
		return fmt.Errorf("rows mismatch:\nwant: %+v\ngot: %+v", rows, result)
	}
	return nil
}

func TestIssue400(t *testing.T) {
	type B struct {
		Name string
	}
	type A struct {
		B []B `parquet:",optional"`
	}

	b := new(bytes.Buffer)
	w := parquet.NewGenericWriter[A](b)
	expect := []A{
		{
			B: []B{
				{
					// 32 bytes random so we can see in the binary parquet if we
					// actually wrote the value
					Name: "9e7eb1f0-bbcc-43ec-bfad-a9fac1bb0feb",
				},
			},
		},
	}
	_, err := w.Write(expect)
	if err != nil {
		t.Fatal(err)
	}
	if err = w.Close(); err != nil {
		t.Fatal(err)
	}

	r := parquet.NewGenericReader[A](bytes.NewReader(b.Bytes()))
	values := make([]A, 1)
	_, err = r.Read(values)
	if err != nil {
		t.Fatal(err)
	}
	if !reflect.DeepEqual(expect[0], values[0]) {
		t.Errorf("want %q got %q", values[0], expect[0])
	}
}

func TestReadMinPageSize(t *testing.T) {
	// NOTE: min page size is 307 for MyRow schema
	t.Run("test read less than min page size", func(t *testing.T) { testReadMinPageSize(128, t) })
	t.Run("test read equal to min page size", func(t *testing.T) { testReadMinPageSize(307, t) })
	t.Run("test read more than min page size", func(t *testing.T) { testReadMinPageSize(384, t) })
	// NOTE: num rows is 20,000
	t.Run("test read equal to num rows", func(t *testing.T) { testReadMinPageSize(20_000, t) })
	t.Run("test read more than num rows", func(t *testing.T) { testReadMinPageSize(25_000, t) })
}

func testReadMinPageSize(readSize int, t *testing.T) {
	type MyRow struct {
		ID    [16]byte `parquet:"id,delta,uuid"`
		File  string   `parquet:"file,dict,zstd"`
		Index int64    `parquet:"index,delta,zstd"`
	}

	numRows := 20_000
	maxPageBytes := 5000

	tmp, err := os.CreateTemp("/tmp", "*.parquet")
	if err != nil {
		t.Fatal("os.CreateTemp: ", err)
	}
	path := tmp.Name()
	defer os.Remove(path)
	t.Log("file:", path)

	// The page buffer size ensures we get multiple pages out of this example.
	w := parquet.NewGenericWriter[MyRow](tmp, parquet.PageBufferSize(maxPageBytes))
	// Need to write 1 row at a time here as writing many at once disregards PageBufferSize option.
	for i := 0; i < numRows; i++ {
		row := MyRow{
			ID:    [16]byte{15: byte(i)},
			File:  "hi" + fmt.Sprint(i),
			Index: int64(i),
		}
		_, err := w.Write([]MyRow{row})
		if err != nil {
			t.Fatal("w.Write: ", err)
		}
		// Flush writes rows as row group. 4 total (20k/5k) in this file.
		if (i+1)%maxPageBytes == 0 {
			err = w.Flush()
			if err != nil {
				t.Fatal("w.Flush: ", err)
			}
		}
	}
	err = w.Close()
	if err != nil {
		t.Fatal("w.Close: ", err)
	}
	err = tmp.Close()
	if err != nil {
		t.Fatal("tmp.Close: ", err)
	}

	file, err := os.Open(path)
	if err != nil {
		t.Fatal("os.Open", err)
	}
	reader := parquet.NewGenericReader[MyRow](file)
	read := int64(0)
	nRows := reader.NumRows()
	rows := make([]MyRow, 0, nRows)
	buf := make([]MyRow, readSize) // NOTE: min page size is 307 for MyRow schema

	for read < nRows {
		num, err := reader.Read(buf)
		read += int64(num)
		if err != nil && !errors.Is(err, io.EOF) {
			t.Fatal("Read:", err)
		}
		rows = append(rows, buf...)
	}

	if err := reader.Close(); err != nil {
		t.Fatal("Close", err)
	}

	if len(rows) < numRows {
		t.Fatalf("not enough values were read: want=%d got=%d", len(rows), numRows)
	}
	for i, row := range rows[:numRows] {
		id := [16]byte{15: byte(i)}
		file := "hi" + fmt.Sprint(i)
		index := int64(i)

		if row.ID != id || row.File != file || row.Index != index {
			t.Fatalf("rows mismatch at index: %d got: %+v", i, row)
		}
	}
}

func BenchmarkGenericReader(b *testing.B) {
	benchmarkGenericReader[benchmarkRowType](b)
	benchmarkGenericReader[booleanColumn](b)
	benchmarkGenericReader[int32Column](b)
	benchmarkGenericReader[int64Column](b)
	benchmarkGenericReader[floatColumn](b)
	benchmarkGenericReader[doubleColumn](b)
	benchmarkGenericReader[byteArrayColumn](b)
	benchmarkGenericReader[fixedLenByteArrayColumn](b)
	benchmarkGenericReader[stringColumn](b)
	benchmarkGenericReader[indexedStringColumn](b)
	benchmarkGenericReader[uuidColumn](b)
	benchmarkGenericReader[timeColumn](b)
	benchmarkGenericReader[timeInMillisColumn](b)
	benchmarkGenericReader[mapColumn](b)
	benchmarkGenericReader[decimalColumn](b)
	benchmarkGenericReader[contact](b)
	benchmarkGenericReader[paddedBooleanColumn](b)
	benchmarkGenericReader[optionalInt32Column](b)
}

func benchmarkGenericReader[Row generator[Row]](b *testing.B) {
	var model Row
	b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) {
		prng := rand.New(rand.NewSource(0))
		rows := make([]Row, benchmarkNumRows)
		for i := range rows {
			rows[i] = rows[i].generate(prng)
		}

		rowbuf := make([]Row, benchmarkRowsPerStep)
		buffer := parquet.NewGenericBuffer[Row]()
		buffer.Write(rows)

		b.Run("go1.17", func(b *testing.B) {
			reader := parquet.NewRowGroupReader(buffer)
			benchmarkRowsPerSecond(b, func() int {
				for i := range rowbuf {
					if err := reader.Read(&rowbuf[i]); err != nil {
						if err != io.EOF {
							b.Fatal(err)
						} else {
							reader.Reset()
						}
					}
				}
				return len(rowbuf)
			})
		})

		b.Run("go1.18", func(b *testing.B) {
			reader := parquet.NewGenericRowGroupReader[Row](buffer)
			benchmarkRowsPerSecond(b, func() int {
				n, err := reader.Read(rowbuf)
				if err != nil {
					if err != io.EOF {
						b.Fatal(err)
					} else {
						reader.Reset()
					}
				}
				return n
			})
		})
	})
}


================================================
FILE: reader_test.go
================================================
package parquet_test

import (
	"bytes"
	"fmt"
	"io"
	"math"
	"math/rand"
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/internal/quick"
)

func rowsOf(numRows int, model interface{}) rows {
	prng := rand.New(rand.NewSource(0))
	return randomRowsOf(prng, numRows, model)
}

func randomRowsOf(prng *rand.Rand, numRows int, model interface{}) rows {
	typ := reflect.TypeOf(model)
	rows := make(rows, numRows)
	makeValue := quick.MakeValueFuncOf(typ)
	for i := range rows {
		v := reflect.New(typ).Elem()
		makeValue(v, prng)
		rows[i] = v.Interface()
	}
	return rows
}

var readerTests = []struct {
	scenario string
	model    interface{}
}{
	{
		scenario: "BOOLEAN",
		model:    booleanColumn{},
	},

	{
		scenario: "INT32",
		model:    int32Column{},
	},

	{
		scenario: "INT64",
		model:    int64Column{},
	},

	{
		scenario: "INT96",
		model:    int96Column{},
	},

	{
		scenario: "FLOAT",
		model:    floatColumn{},
	},

	{
		scenario: "DOUBLE",
		model:    doubleColumn{},
	},

	{
		scenario: "BYTE_ARRAY",
		model:    byteArrayColumn{},
	},

	{
		scenario: "FIXED_LEN_BYTE_ARRAY",
		model:    fixedLenByteArrayColumn{},
	},

	{
		scenario: "STRING",
		model:    stringColumn{},
	},

	{
		scenario: "STRING (dict)",
		model:    indexedStringColumn{},
	},

	{
		scenario: "UUID",
		model:    uuidColumn{},
	},

	{
		scenario: "time.Time",
		model:    timeColumn{},
	},

	{
		scenario: "time.Time in ms",
		model:    timeInMillisColumn{},
	},

	{
		scenario: "DECIMAL",
		model:    decimalColumn{},
	},

	{
		scenario: "AddressBook",
		model:    addressBook{},
	},

	{
		scenario: "one optional level",
		model:    listColumn2{},
	},

	{
		scenario: "one repeated level",
		model:    listColumn1{},
	},

	{
		scenario: "two repeated levels",
		model:    listColumn0{},
	},

	{
		scenario: "three repeated levels",
		model:    listColumn0{},
	},

	{
		scenario: "nested lists",
		model:    nestedListColumn{},
	},

	{
		scenario: "key-value pairs",
		model: struct {
			KeyValuePairs map[utf8string]utf8string
		}{},
	},

	{
		scenario: "multiple key-value pairs",
		model: struct {
			KeyValuePairs0 map[utf8string]utf8string
			KeyValuePairs1 map[utf8string]utf8string
			KeyValuePairs2 map[utf8string]utf8string
		}{},
	},

	{
		scenario: "repeated key-value pairs",
		model: struct {
			RepeatedKeyValuePairs []map[utf8string]utf8string
		}{},
	},

	{
		scenario: "map of repeated values",
		model: struct {
			MapOfRepeated map[utf8string][]utf8string
		}{},
	},
}

func TestReader(t *testing.T) {
	buf := new(bytes.Buffer)
	file := bytes.NewReader(nil)

	for _, test := range readerTests {
		t.Run(test.scenario, func(t *testing.T) {
			const N = 42

			rowType := reflect.TypeOf(test.model)
			rowPtr := reflect.New(rowType)
			rowZero := reflect.Zero(rowType)
			rowValue := rowPtr.Elem()

			for n := 1; n < N; n++ {
				t.Run(fmt.Sprintf("N=%d", n), func(t *testing.T) {
					defer buf.Reset()
					rows := rowsOf(n, test.model)

					if err := writeParquetFileWithBuffer(buf, rows); err != nil {
						t.Fatal(err)
					}

					file.Reset(buf.Bytes())
					r := parquet.NewReader(file, parquet.SchemaOf(test.model))

					for i, v := range rows {
						if err := r.Read(rowPtr.Interface()); err != nil {
							t.Fatal(err)
						}
						if !reflect.DeepEqual(rowValue.Interface(), v) {
							t.Errorf("row mismatch at index %d\nwant = %+v\ngot  = %+v", i, v, rowValue.Interface())
						}
						rowValue.Set(rowZero)
					}

					if err := r.Read(rowPtr.Interface()); err != io.EOF {
						t.Errorf("expected EOF after reading all values but got: %v", err)
					}
				})
			}
		})
	}
}

func BenchmarkReaderReadType(b *testing.B) {
	buf := new(bytes.Buffer)
	file := bytes.NewReader(nil)

	for _, test := range readerTests {
		b.Run(test.scenario, func(b *testing.B) {
			defer buf.Reset()
			rows := rowsOf(benchmarkNumRows, test.model)

			if err := writeParquetFile(buf, rows); err != nil {
				b.Fatal(err)
			}
			file.Reset(buf.Bytes())
			f, err := parquet.OpenFile(file, file.Size())
			if err != nil {
				b.Fatal(err)
			}

			rowType := reflect.TypeOf(test.model)
			rowPtr := reflect.New(rowType)
			rowZero := reflect.Zero(rowType)
			rowValue := rowPtr.Elem()

			r := parquet.NewReader(f)
			p := rowPtr.Interface()

			benchmarkRowsPerSecond(b, func() (n int) {
				for i := 0; i < benchmarkRowsPerStep; i++ {
					if err := r.Read(p); err != nil {
						if err == io.EOF {
							r.Reset()
						} else {
							b.Fatal(err)
						}
					}
				}
				rowValue.Set(rowZero)
				return benchmarkRowsPerStep
			})

			b.SetBytes(int64(math.Ceil(benchmarkRowsPerStep * float64(file.Size()) / benchmarkNumRows)))
		})
	}
}

func BenchmarkReaderReadRow(b *testing.B) {
	buf := new(bytes.Buffer)
	file := bytes.NewReader(nil)

	for _, test := range readerTests {
		b.Run(test.scenario, func(b *testing.B) {
			defer buf.Reset()
			rows := rowsOf(benchmarkNumRows, test.model)

			if err := writeParquetFile(buf, rows); err != nil {
				b.Fatal(err)
			}
			file.Reset(buf.Bytes())
			f, err := parquet.OpenFile(file, file.Size())
			if err != nil {
				b.Fatal(err)
			}

			r := parquet.NewReader(f)
			rowbuf := make([]parquet.Row, benchmarkRowsPerStep)

			benchmarkRowsPerSecond(b, func() int {
				n, err := r.ReadRows(rowbuf)
				if err != nil {
					if err == io.EOF {
						r.Reset()
					} else {
						b.Fatal(err)
					}
				}
				return n
			})

			b.SetBytes(int64(math.Ceil(benchmarkRowsPerStep * float64(file.Size()) / benchmarkNumRows)))
		})
	}
}

func TestReaderReadSubset(t *testing.T) {
	// In this example we'll write 3 columns to the file - X, Y, and Z, but
	// we'll only read out the X and Y columns. Returns true if all writes
	// and reads were successful, and false otherwise.
	type Point3D struct{ X, Y, Z int64 }
	type Point2D struct{ X, Y int64 }

	err := quickCheck(func(points3D []Point3D) bool {
		if len(points3D) == 0 {
			return true
		}
		buf := new(bytes.Buffer)
		err := writeParquetFile(buf, makeRows(points3D))
		if err != nil {
			t.Error(err)
			return false
		}
		reader := parquet.NewReader(bytes.NewReader(buf.Bytes()))
		for i := 0; ; i++ {
			row := Point2D{}
			err := reader.Read(&row)
			if err != nil {
				if err == io.EOF && i == len(points3D) {
					break
				}
				t.Error(err)
				return false
			}
			if row != (Point2D{X: points3D[i].X, Y: points3D[i].Y}) {
				t.Errorf("points mismatch at row index %d: want=%v got=%v", i, points3D[i], row)
				return false
			}
		}
		return true
	})
	if err != nil {
		t.Error(err)
	}
}

func TestReaderSeekToRow(t *testing.T) {
	type rowType struct {
		Name utf8string `parquet:",dict"`
	}

	rows := rowsOf(10, rowType{})
	buf := new(bytes.Buffer)
	err := writeParquetFile(buf, rows)
	if err != nil {
		t.Fatal(err)
	}

	reader := parquet.NewReader(bytes.NewReader(buf.Bytes()))
	for i := 0; i < 10; i++ {
		if err := reader.SeekToRow(int64(i)); err != nil {
			t.Fatalf("seek to row %d: %v", i, err)
		}

		row := new(rowType)
		err := reader.Read(row)
		if err != nil {
			t.Fatalf("reading row %d: %v", i, err)
		}

		if *row != rows[i] {
			t.Fatalf("row %d mismatch: got=%+v want=%+v", i, *row, rows[i])
		}
	}
}

func TestSeekToRowNoDict(t *testing.T) {
	type rowType struct {
		Name utf8string `parquet:","` // no dictionary encoding
	}

	// write samples to in-memory buffer
	buf := new(bytes.Buffer)
	schema := parquet.SchemaOf(new(rowType))
	w := parquet.NewWriter(buf, schema)
	sample := rowType{
		Name: "foo1",
	}
	// write two rows
	w.Write(sample)
	sample.Name = "foo2"
	w.Write(sample)
	w.Close()

	// create reader
	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))

	// read second row
	r.SeekToRow(1)
	row := new(rowType)
	err := r.Read(row)
	if err != nil {
		t.Fatalf("reading row: %v", err)
	}
	// fmt.Println(&sample, row)
	if *row != sample {
		t.Fatalf("read != write")
	}
}

func TestSeekToRowReadAll(t *testing.T) {
	type rowType struct {
		Name utf8string `parquet:",dict"`
	}

	// write samples to in-memory buffer
	buf := new(bytes.Buffer)
	schema := parquet.SchemaOf(new(rowType))
	w := parquet.NewWriter(buf, schema)
	sample := rowType{
		Name: "foo1",
	}
	// write two rows
	w.Write(sample)
	sample.Name = "foo2"
	w.Write(sample)
	w.Close()

	// create reader
	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))

	// read first row
	r.SeekToRow(0)
	row := new(rowType)
	err := r.Read(row)
	if err != nil {
		t.Fatalf("reading row: %v", err)
	}
	// read second row
	r.SeekToRow(1)
	row = new(rowType)
	err = r.Read(row)
	if err != nil {
		t.Fatalf("reading row: %v", err)
	}
	// fmt.Println(&sample, row)
	if *row != sample {
		t.Fatalf("read != write")
	}
}

func TestSeekToRowDictReadSecond(t *testing.T) {
	type rowType struct {
		Name utf8string `parquet:",dict"`
	}

	// write samples to in-memory buffer
	buf := new(bytes.Buffer)
	schema := parquet.SchemaOf(new(rowType))
	w := parquet.NewWriter(buf, schema)
	sample := rowType{
		Name: "foo1",
	}
	// write two rows
	w.Write(sample)
	sample.Name = "foo2"
	w.Write(sample)
	w.Close()

	// create reader
	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))

	// read second row
	r.SeekToRow(1)
	row := new(rowType)
	err := r.Read(row)
	if err != nil {
		t.Fatalf("reading row: %v", err)
	}
	// fmt.Println(&sample, row)
	if *row != sample {
		t.Fatalf("read != write")
	}
}

func TestSeekToRowDictReadMultiplePages(t *testing.T) {
	type rowType struct {
		Name utf8string `parquet:",dict"`
	}

	// write samples to in-memory buffer
	buf := new(bytes.Buffer)
	schema := parquet.SchemaOf(new(rowType))
	w := parquet.NewWriter(buf, schema, &parquet.WriterConfig{
		PageBufferSize: 10,
	})
	sample := rowType{
		Name: "foo1",
	}

	// write enough rows to spill over a single page
	for i := 0; i < 10; i++ {
		w.Write(sample)
	}
	sample.Name = "foo2"
	w.Write(sample)
	w.Close()

	// create reader
	r := parquet.NewReader(bytes.NewReader(buf.Bytes()))

	// read 11th row
	r.SeekToRow(10)
	row := new(rowType)
	err := r.Read(row)
	if err != nil {
		t.Fatalf("reading row: %v", err)
	}
	if *row != sample {
		t.Fatalf("read != write")
	}
}


================================================
FILE: row.go
================================================
package parquet

import (
	"errors"
	"fmt"
	"io"
	"reflect"
)

const (
	defaultRowBufferSize = 42
)

// Row represents a parquet row as a slice of values.
//
// Each value should embed a column index, repetition level, and definition
// level allowing the program to determine how to reconstruct the original
// object from the row.
type Row []Value

// MakeRow constructs a Row from a list of column values.
//
// The function panics if the column indexes of values in each column do not
// match their position in the argument list.
func MakeRow(columns ...[]Value) Row { return AppendRow(nil, columns...) }

// AppendRow appends to row the given list of column values.
//
// AppendRow can be used to construct a Row value from columns, while retaining
// the underlying memory buffer to avoid reallocation; for example:
//
// The function panics if the column indexes of values in each column do not
// match their position in the argument list.
func AppendRow(row Row, columns ...[]Value) Row {
	numValues := 0

	for expectedColumnIndex, column := range columns {
		numValues += len(column)

		for _, value := range column {
			if value.columnIndex != ^int16(expectedColumnIndex) {
				panic(fmt.Sprintf("value of column %d has column index %d", expectedColumnIndex, value.Column()))
			}
		}
	}

	if capacity := cap(row) - len(row); capacity < numValues {
		row = append(make(Row, 0, len(row)+numValues), row...)
	}

	return appendRow(row, columns)
}

func appendRow(row Row, columns [][]Value) Row {
	for _, column := range columns {
		row = append(row, column...)
	}
	return row
}

// Clone creates a copy of the row which shares no pointers.
//
// This method is useful to capture rows after a call to RowReader.ReadRows when
// values need to be retained before the next call to ReadRows or after the lifespan
// of the reader.
func (row Row) Clone() Row {
	clone := make(Row, len(row))
	for i := range row {
		clone[i] = row[i].Clone()
	}
	return clone
}

// Equal returns true if row and other contain the same sequence of values.
func (row Row) Equal(other Row) bool {
	if len(row) != len(other) {
		return false
	}
	for i := range row {
		if !Equal(row[i], other[i]) {
			return false
		}
		if row[i].repetitionLevel != other[i].repetitionLevel {
			return false
		}
		if row[i].definitionLevel != other[i].definitionLevel {
			return false
		}
		if row[i].columnIndex != other[i].columnIndex {
			return false
		}
	}
	return true
}

// Range calls f for each column of row.
func (row Row) Range(f func(columnIndex int, columnValues []Value) bool) {
	columnIndex := 0

	for i := 0; i < len(row); {
		j := i + 1

		for j < len(row) && row[j].columnIndex == ^int16(columnIndex) {
			j++
		}

		if !f(columnIndex, row[i:j:j]) {
			break
		}

		columnIndex++
		i = j
	}
}

// RowSeeker is an interface implemented by readers of parquet rows which can be
// positioned at a specific row index.
type RowSeeker interface {
	// Positions the stream on the given row index.
	//
	// Some implementations of the interface may only allow seeking forward.
	//
	// The method returns io.ErrClosedPipe if the stream had already been closed.
	SeekToRow(int64) error
}

// RowReader reads a sequence of parquet rows.
type RowReader interface {
	// ReadRows reads rows from the reader, returning the number of rows read
	// into the buffer, and any error that occurred. Note that the rows read
	// into the buffer are not safe for reuse after a subsequent call to
	// ReadRows. Callers that want to reuse rows must copy the rows using Clone.
	//
	// When all rows have been read, the reader returns io.EOF to indicate the
	// end of the sequence. It is valid for the reader to return both a non-zero
	// number of rows and a non-nil error (including io.EOF).
	//
	// The buffer of rows passed as argument will be used to store values of
	// each row read from the reader. If the rows are not nil, the backing array
	// of the slices will be used as an optimization to avoid re-allocating new
	// arrays.
	//
	// The application is expected to handle the case where ReadRows returns
	// less rows than requested and no error, by looking at the first returned
	// value from ReadRows, which is the number of rows that were read.
	ReadRows([]Row) (int, error)
}

// RowReaderFrom reads parquet rows from reader.
type RowReaderFrom interface {
	ReadRowsFrom(RowReader) (int64, error)
}

// RowReaderWithSchema is an extension of the RowReader interface which
// advertises the schema of rows returned by ReadRow calls.
type RowReaderWithSchema interface {
	RowReader
	Schema() *Schema
}

// RowReadSeeker is an interface implemented by row readers which support
// seeking to arbitrary row positions.
type RowReadSeeker interface {
	RowReader
	RowSeeker
}

// RowWriter writes parquet rows to an underlying medium.
type RowWriter interface {
	// Writes rows to the writer, returning the number of rows written and any
	// error that occurred.
	//
	// Because columnar operations operate on independent columns of values,
	// writes of rows may not be atomic operations, and could result in some
	// rows being partially written. The method returns the number of rows that
	// were successfully written, but if an error occurs, values of the row(s)
	// that failed to be written may have been partially committed to their
	// columns. For that reason, applications should consider a write error as
	// fatal and assume that they need to discard the state, they cannot retry
	// the write nor recover the underlying file.
	WriteRows([]Row) (int, error)
}

// RowWriterTo writes parquet rows to a writer.
type RowWriterTo interface {
	WriteRowsTo(RowWriter) (int64, error)
}

// RowWriterWithSchema is an extension of the RowWriter interface which
// advertises the schema of rows expected to be passed to WriteRow calls.
type RowWriterWithSchema interface {
	RowWriter
	Schema() *Schema
}

// RowReaderFunc is a function type implementing the RowReader interface.
type RowReaderFunc func([]Row) (int, error)

func (f RowReaderFunc) ReadRows(rows []Row) (int, error) { return f(rows) }

// RowWriterFunc is a function type implementing the RowWriter interface.
type RowWriterFunc func([]Row) (int, error)

func (f RowWriterFunc) WriteRows(rows []Row) (int, error) { return f(rows) }

// MultiRowWriter constructs a RowWriter which dispatches writes to all the
// writers passed as arguments.
//
// When writing rows, if any of the writers returns an error, the operation is
// aborted and the error returned. If one of the writers did not error, but did
// not write all the rows, the operation is aborted and io.ErrShortWrite is
// returned.
//
// Rows are written sequentially to each writer in the order they are given to
// this function.
func MultiRowWriter(writers ...RowWriter) RowWriter {
	m := &multiRowWriter{writers: make([]RowWriter, len(writers))}
	copy(m.writers, writers)
	return m
}

type multiRowWriter struct{ writers []RowWriter }

func (m *multiRowWriter) WriteRows(rows []Row) (int, error) {
	for _, w := range m.writers {
		n, err := w.WriteRows(rows)
		if err != nil {
			return n, err
		}
		if n != len(rows) {
			return n, io.ErrShortWrite
		}
	}
	return len(rows), nil
}

type forwardRowSeeker struct {
	rows  RowReader
	seek  int64
	index int64
}

func (r *forwardRowSeeker) ReadRows(rows []Row) (int, error) {
	for {
		n, err := r.rows.ReadRows(rows)

		if n > 0 && r.index < r.seek {
			skip := r.seek - r.index
			r.index += int64(n)
			if skip >= int64(n) {
				continue
			}

			for i, j := 0, int(skip); j < n; i++ {
				rows[i] = append(rows[i][:0], rows[j]...)
			}

			n -= int(skip)
		}

		return n, err
	}
}

func (r *forwardRowSeeker) SeekToRow(rowIndex int64) error {
	if rowIndex >= r.index {
		r.seek = rowIndex
		return nil
	}
	return fmt.Errorf(
		"SeekToRow: %T does not implement parquet.RowSeeker: cannot seek backward from row %d to %d",
		r.rows,
		r.index,
		rowIndex,
	)
}

// CopyRows copies rows from src to dst.
//
// The underlying types of src and dst are tested to determine if they expose
// information about the schema of rows that are read and expected to be
// written. If the schema information are available but do not match, the
// function will attempt to automatically convert the rows from the source
// schema to the destination.
//
// As an optimization, the src argument may implement RowWriterTo to bypass
// the default row copy logic and provide its own. The dst argument may also
// implement RowReaderFrom for the same purpose.
//
// The function returns the number of rows written, or any error encountered
// other than io.EOF.
func CopyRows(dst RowWriter, src RowReader) (int64, error) {
	return copyRows(dst, src, nil)
}

func copyRows(dst RowWriter, src RowReader, buf []Row) (written int64, err error) {
	targetSchema := targetSchemaOf(dst)
	sourceSchema := sourceSchemaOf(src)

	if targetSchema != nil && sourceSchema != nil {
		if !nodesAreEqual(targetSchema, sourceSchema) {
			conv, err := Convert(targetSchema, sourceSchema)
			if err != nil {
				return 0, err
			}
			// The conversion effectively disables a potential optimization
			// if the source reader implemented RowWriterTo. It is a trade off
			// we are making to optimize for safety rather than performance.
			//
			// Entering this code path should not be the common case tho, it is
			// most often used when parquet schemas are evolving, but we expect
			// that the majority of files of an application to be sharing a
			// common schema.
			src = ConvertRowReader(src, conv)
		}
	}

	if wt, ok := src.(RowWriterTo); ok {
		return wt.WriteRowsTo(dst)
	}

	if rf, ok := dst.(RowReaderFrom); ok {
		return rf.ReadRowsFrom(src)
	}

	if len(buf) == 0 {
		buf = make([]Row, defaultRowBufferSize)
	}

	defer clearRows(buf)

	for {
		rn, err := src.ReadRows(buf)

		if rn > 0 {
			wn, err := dst.WriteRows(buf[:rn])
			if err != nil {
				return written, err
			}

			written += int64(wn)
		}

		if err != nil {
			if errors.Is(err, io.EOF) {
				err = nil
			}
			return written, err
		}

		if rn == 0 {
			return written, io.ErrNoProgress
		}
	}
}

func makeRows(n int) []Row {
	buf := make([]Value, n)
	row := make([]Row, n)
	for i := range row {
		row[i] = buf[i : i : i+1]
	}
	return row
}

func clearRows(rows []Row) {
	for i, values := range rows {
		clearValues(values)
		rows[i] = values[:0]
	}
}

func sourceSchemaOf(r RowReader) *Schema {
	if rrs, ok := r.(RowReaderWithSchema); ok {
		return rrs.Schema()
	}
	return nil
}

func targetSchemaOf(w RowWriter) *Schema {
	if rws, ok := w.(RowWriterWithSchema); ok {
		return rws.Schema()
	}
	return nil
}

// =============================================================================
// Functions returning closures are marked with "go:noinline" below to prevent
// losing naming information of the closure in stack traces.
//
// Because some of the functions are very short (simply return a closure), the
// compiler inlines when at their call site, which result in the closure being
// named something like parquet.deconstructFuncOf.func2 instead of the original
// parquet.deconstructFuncOfLeaf.func1; the latter being much more meaningful
// when reading CPU or memory profiles.
// =============================================================================

type levels struct {
	repetitionDepth byte
	repetitionLevel byte
	definitionLevel byte
}

// deconstructFunc accepts a row, the current levels, the value to deserialize
// the current column onto, and returns the row minus the deserialied value(s)
// It recurses until it hits a leaf node, then deserializes that value
// individually as the base case.
type deconstructFunc func([][]Value, levels, reflect.Value)

func deconstructFuncOf(columnIndex int16, node Node) (int16, deconstructFunc) {
	switch {
	case node.Optional():
		return deconstructFuncOfOptional(columnIndex, node)
	case node.Repeated():
		return deconstructFuncOfRepeated(columnIndex, node)
	case isList(node):
		return deconstructFuncOfList(columnIndex, node)
	case isMap(node):
		return deconstructFuncOfMap(columnIndex, node)
	default:
		return deconstructFuncOfRequired(columnIndex, node)
	}
}

//go:noinline
func deconstructFuncOfOptional(columnIndex int16, node Node) (int16, deconstructFunc) {
	columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node))
	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
		if value.IsValid() {
			if value.IsZero() {
				value = reflect.Value{}
			} else {
				if value.Kind() == reflect.Ptr {
					value = value.Elem()
				}
				levels.definitionLevel++
			}
		}
		deconstruct(columns, levels, value)
	}
}

//go:noinline
func deconstructFuncOfRepeated(columnIndex int16, node Node) (int16, deconstructFunc) {
	columnIndex, deconstruct := deconstructFuncOf(columnIndex, Required(node))
	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
		if !value.IsValid() || value.Len() == 0 {
			deconstruct(columns, levels, reflect.Value{})
			return
		}

		levels.repetitionDepth++
		levels.definitionLevel++

		for i, n := 0, value.Len(); i < n; i++ {
			deconstruct(columns, levels, value.Index(i))
			levels.repetitionLevel = levels.repetitionDepth
		}
	}
}

func deconstructFuncOfRequired(columnIndex int16, node Node) (int16, deconstructFunc) {
	switch {
	case node.Leaf():
		return deconstructFuncOfLeaf(columnIndex, node)
	default:
		return deconstructFuncOfGroup(columnIndex, node)
	}
}

func deconstructFuncOfList(columnIndex int16, node Node) (int16, deconstructFunc) {
	return deconstructFuncOf(columnIndex, Repeated(listElementOf(node)))
}

//go:noinline
func deconstructFuncOfMap(columnIndex int16, node Node) (int16, deconstructFunc) {
	keyValue := mapKeyValueOf(node)
	keyValueType := keyValue.GoType()
	keyValueElem := keyValueType.Elem()
	keyType := keyValueElem.Field(0).Type
	valueType := keyValueElem.Field(1).Type
	nextColumnIndex, deconstruct := deconstructFuncOf(columnIndex, schemaOf(keyValueElem))
	return nextColumnIndex, func(columns [][]Value, levels levels, mapValue reflect.Value) {
		if !mapValue.IsValid() || mapValue.Len() == 0 {
			deconstruct(columns, levels, reflect.Value{})
			return
		}

		levels.repetitionDepth++
		levels.definitionLevel++

		elem := reflect.New(keyValueElem).Elem()
		k := elem.Field(0)
		v := elem.Field(1)

		for _, key := range mapValue.MapKeys() {
			k.Set(key.Convert(keyType))
			v.Set(mapValue.MapIndex(key).Convert(valueType))
			deconstruct(columns, levels, elem)
			levels.repetitionLevel = levels.repetitionDepth
		}
	}
}

//go:noinline
func deconstructFuncOfGroup(columnIndex int16, node Node) (int16, deconstructFunc) {
	fields := node.Fields()
	funcs := make([]deconstructFunc, len(fields))
	for i, field := range fields {
		columnIndex, funcs[i] = deconstructFuncOf(columnIndex, field)
	}
	return columnIndex, func(columns [][]Value, levels levels, value reflect.Value) {
		if value.IsValid() {
			for i, f := range funcs {
				f(columns, levels, fields[i].Value(value))
			}
		} else {
			for _, f := range funcs {
				f(columns, levels, value)
			}
		}
	}
}

//go:noinline
func deconstructFuncOfLeaf(columnIndex int16, node Node) (int16, deconstructFunc) {
	if columnIndex > MaxColumnIndex {
		panic("row cannot be deconstructed because it has more than 127 columns")
	}
	typ := node.Type()
	kind := typ.Kind()
	lt := typ.LogicalType()
	valueColumnIndex := ^columnIndex
	return columnIndex + 1, func(columns [][]Value, levels levels, value reflect.Value) {
		v := Value{}

		if value.IsValid() {
			v = makeValue(kind, lt, value)
		}

		v.repetitionLevel = levels.repetitionLevel
		v.definitionLevel = levels.definitionLevel
		v.columnIndex = valueColumnIndex

		columns[columnIndex] = append(columns[columnIndex], v)
	}
}

// "reconstructX" turns a Go value into a Go representation of a Parquet series
// of values

type reconstructFunc func(reflect.Value, levels, [][]Value) error

func reconstructFuncOf(columnIndex int16, node Node) (int16, reconstructFunc) {
	switch {
	case node.Optional():
		return reconstructFuncOfOptional(columnIndex, node)
	case node.Repeated():
		return reconstructFuncOfRepeated(columnIndex, node)
	case isList(node):
		return reconstructFuncOfList(columnIndex, node)
	case isMap(node):
		return reconstructFuncOfMap(columnIndex, node)
	default:
		return reconstructFuncOfRequired(columnIndex, node)
	}
}

//go:noinline
func reconstructFuncOfOptional(columnIndex int16, node Node) (int16, reconstructFunc) {
	// We convert the optional func to required so that we eventually reach the
	// leaf base-case.  We're still using the heuristics of optional in the
	// returned closure (see levels.definitionLevel++), but we don't actually do
	// deserialization here, that happens in the leaf function, hence this line.
	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node))

	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
		levels.definitionLevel++

		if columns[0][0].definitionLevel < levels.definitionLevel {
			value.Set(reflect.Zero(value.Type()))
			return nil
		}

		if value.Kind() == reflect.Ptr {
			if value.IsNil() {
				value.Set(reflect.New(value.Type().Elem()))
			}
			value = value.Elem()
		}

		return reconstruct(value, levels, columns)
	}
}

func setMakeSlice(v reflect.Value, n int) reflect.Value {
	t := v.Type()
	if t.Kind() == reflect.Interface {
		t = reflect.TypeOf(([]interface{})(nil))
	}
	s := reflect.MakeSlice(t, n, n)
	v.Set(s)
	return s
}

//go:noinline
func reconstructFuncOfRepeated(columnIndex int16, node Node) (int16, reconstructFunc) {
	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, Required(node))
	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
		levels.repetitionDepth++
		levels.definitionLevel++

		if columns[0][0].definitionLevel < levels.definitionLevel {
			setMakeSlice(value, 0)
			return nil
		}

		values := make([][]Value, len(columns))
		column := columns[0]
		n := 0

		for i, column := range columns {
			values[i] = column[0:0:len(column)]
		}

		for i := 0; i < len(column); {
			i++
			n++

			for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth {
				i++
			}
		}

		value = setMakeSlice(value, n)

		for i := 0; i < n; i++ {
			for j, column := range values {
				column = column[:cap(column)]
				if len(column) == 0 {
					continue
				}

				k := 1
				for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth {
					k++
				}

				values[j] = column[:k]
			}

			if err := reconstruct(value.Index(i), levels, values); err != nil {
				return err
			}

			for j, column := range values {
				values[j] = column[len(column):len(column):cap(column)]
			}

			levels.repetitionLevel = levels.repetitionDepth
		}

		return nil
	}
}

func reconstructFuncOfRequired(columnIndex int16, node Node) (int16, reconstructFunc) {
	switch {
	case node.Leaf():
		return reconstructFuncOfLeaf(columnIndex, node)
	default:
		return reconstructFuncOfGroup(columnIndex, node)
	}
}

func reconstructFuncOfList(columnIndex int16, node Node) (int16, reconstructFunc) {
	return reconstructFuncOf(columnIndex, Repeated(listElementOf(node)))
}

//go:noinline
func reconstructFuncOfMap(columnIndex int16, node Node) (int16, reconstructFunc) {
	keyValue := mapKeyValueOf(node)
	keyValueType := keyValue.GoType()
	keyValueElem := keyValueType.Elem()
	keyValueZero := reflect.Zero(keyValueElem)
	nextColumnIndex, reconstruct := reconstructFuncOf(columnIndex, schemaOf(keyValueElem))
	return nextColumnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
		levels.repetitionDepth++
		levels.definitionLevel++

		if columns[0][0].definitionLevel < levels.definitionLevel {
			value.Set(reflect.MakeMap(value.Type()))
			return nil
		}

		values := make([][]Value, len(columns))
		column := columns[0]
		t := value.Type()
		k := t.Key()
		v := t.Elem()
		n := 0

		for i, column := range columns {
			values[i] = column[0:0:len(column)]
		}

		for i := 0; i < len(column); {
			i++
			n++

			for i < len(column) && column[i].repetitionLevel > levels.repetitionDepth {
				i++
			}
		}

		if value.IsNil() {
			value.Set(reflect.MakeMapWithSize(t, n))
		}

		elem := reflect.New(keyValueElem).Elem()
		for i := 0; i < n; i++ {
			for j, column := range values {
				column = column[:cap(column)]
				k := 1

				for k < len(column) && column[k].repetitionLevel > levels.repetitionDepth {
					k++
				}

				values[j] = column[:k]
			}

			if err := reconstruct(elem, levels, values); err != nil {
				return err
			}

			for j, column := range values {
				values[j] = column[len(column):len(column):cap(column)]
			}

			value.SetMapIndex(elem.Field(0).Convert(k), elem.Field(1).Convert(v))
			elem.Set(keyValueZero)
			levels.repetitionLevel = levels.repetitionDepth
		}

		return nil
	}
}

//go:noinline
func reconstructFuncOfGroup(columnIndex int16, node Node) (int16, reconstructFunc) {
	fields := node.Fields()
	funcs := make([]reconstructFunc, len(fields))
	columnOffsets := make([]int16, len(fields))
	firstColumnIndex := columnIndex

	for i, field := range fields {
		columnIndex, funcs[i] = reconstructFuncOf(columnIndex, field)
		columnOffsets[i] = columnIndex - firstColumnIndex
	}

	return columnIndex, func(value reflect.Value, levels levels, columns [][]Value) error {
		if value.Kind() == reflect.Interface {
			value.Set(reflect.MakeMap(reflect.TypeOf((map[string]interface{})(nil))))
			value = value.Elem()
		}

		if value.Kind() == reflect.Map {
			elemType := value.Type().Elem()
			name := reflect.New(reflect.TypeOf("")).Elem()
			elem := reflect.New(elemType).Elem()
			zero := reflect.Zero(elemType)

			if value.Len() > 0 {
				value.Set(reflect.MakeMap(value.Type()))
			}

			off := int16(0)

			for i, f := range funcs {
				name.SetString(fields[i].Name())
				end := columnOffsets[i]
				err := f(elem, levels, columns[off:end:end])
				if err != nil {
					return fmt.Errorf("%s → %w", name, err)
				}
				off = end
				value.SetMapIndex(name, elem)
				elem.Set(zero)
			}
		} else {
			off := int16(0)

			for i, f := range funcs {
				end := columnOffsets[i]
				err := f(fields[i].Value(value), levels, columns[off:end:end])
				if err != nil {
					return fmt.Errorf("%s → %w", fields[i].Name(), err)
				}
				off = end
			}
		}

		return nil
	}
}

//go:noinline
func reconstructFuncOfLeaf(columnIndex int16, node Node) (int16, reconstructFunc) {
	typ := node.Type()
	return columnIndex + 1, func(value reflect.Value, _ levels, columns [][]Value) error {
		column := columns[0]
		if len(column) == 0 {
			return fmt.Errorf("no values found in parquet row for column %d", columnIndex)
		}
		return typ.AssignValue(value, column[0])
	}
}


================================================
FILE: row_buffer.go
================================================
//go:build go1.18

package parquet

import (
	"io"
	"sort"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
)

// RowBuffer is an implementation of the RowGroup interface which stores parquet
// rows in memory.
//
// Unlike GenericBuffer which uses a column layout to store values in memory
// buffers, RowBuffer uses a row layout. The use of row layout provides greater
// efficiency when sorting the buffer, which is the primary use case for the
// RowBuffer type. Applications which intend to sort rows prior to writing them
// to a parquet file will often see lower CPU utilization from using a RowBuffer
// than a GenericBuffer.
//
// RowBuffer values are not safe to use concurrently from multiple goroutines.
type RowBuffer[T any] struct {
	alloc   rowAllocator
	schema  *Schema
	sorting []SortingColumn
	rows    []Row
	values  []Value
	compare func(Row, Row) int
}

// NewRowBuffer constructs a new row buffer.
func NewRowBuffer[T any](options ...RowGroupOption) *RowBuffer[T] {
	config := DefaultRowGroupConfig()
	config.Apply(options...)
	if err := config.Validate(); err != nil {
		panic(err)
	}

	t := typeOf[T]()
	if config.Schema == nil && t != nil {
		config.Schema = schemaOf(dereference(t))
	}

	if config.Schema == nil {
		panic("row buffer must be instantiated with schema or concrete type.")
	}

	return &RowBuffer[T]{
		schema:  config.Schema,
		sorting: config.Sorting.SortingColumns,
		compare: config.Schema.Comparator(config.Sorting.SortingColumns...),
	}
}

// Reset clears the content of the buffer without releasing its memory.
func (buf *RowBuffer[T]) Reset() {
	for i := range buf.rows {
		buf.rows[i] = nil
	}
	for i := range buf.values {
		buf.values[i] = Value{}
	}
	buf.rows = buf.rows[:0]
	buf.values = buf.values[:0]
	buf.alloc.reset()
}

// NumRows returns the number of rows currently written to the buffer.
func (buf *RowBuffer[T]) NumRows() int64 { return int64(len(buf.rows)) }

// ColumnChunks returns a view of the buffer's columns.
//
// Note that reading columns of a RowBuffer will be less efficient than reading
// columns of a GenericBuffer since the latter uses a column layout. This method
// is mainly exposed to satisfy the RowGroup interface, applications which need
// compute-efficient column scans on in-memory buffers should likely use a
// GenericBuffer instead.
//
// The returned column chunks are snapshots at the time the method is called,
// they remain valid until the next call to Reset on the buffer.
func (buf *RowBuffer[T]) ColumnChunks() []ColumnChunk {
	columns := buf.schema.Columns()
	chunks := make([]rowBufferColumnChunk, len(columns))

	for i, column := range columns {
		leafColumn, _ := buf.schema.Lookup(column...)
		chunks[i] = rowBufferColumnChunk{
			page: rowBufferPage{
				rows:               buf.rows,
				typ:                leafColumn.Node.Type(),
				column:             leafColumn.ColumnIndex,
				maxRepetitionLevel: byte(leafColumn.MaxRepetitionLevel),
				maxDefinitionLevel: byte(leafColumn.MaxDefinitionLevel),
			},
		}
	}

	columnChunks := make([]ColumnChunk, len(chunks))
	for i := range chunks {
		columnChunks[i] = &chunks[i]
	}
	return columnChunks
}

// SortingColumns returns the list of columns that rows are expected to be
// sorted by.
//
// The list of sorting columns is configured when the buffer is created and used
// when it is sorted.
//
// Note that unless the buffer is explicitly sorted, there are no guarantees
// that the rows it contains will be in the order specified by the sorting
// columns.
func (buf *RowBuffer[T]) SortingColumns() []SortingColumn { return buf.sorting }

// Schema returns the schema of rows in the buffer.
func (buf *RowBuffer[T]) Schema() *Schema { return buf.schema }

// Len returns the number of rows in the buffer.
//
// The method contributes to satisfying sort.Interface.
func (buf *RowBuffer[T]) Len() int { return len(buf.rows) }

// Less compares the rows at index i and j according to the sorting columns
// configured on the buffer.
//
// The method contributes to satisfying sort.Interface.
func (buf *RowBuffer[T]) Less(i, j int) bool {
	return buf.compare(buf.rows[i], buf.rows[j]) < 0
}

// Swap exchanges the rows at index i and j in the buffer.
//
// The method contributes to satisfying sort.Interface.
func (buf *RowBuffer[T]) Swap(i, j int) {
	buf.rows[i], buf.rows[j] = buf.rows[j], buf.rows[i]
}

// Rows returns a Rows instance exposing rows stored in the buffer.
//
// The rows returned are a snapshot at the time the method is called.
// The returned rows and values read from it remain valid until the next call
// to Reset on the buffer.
func (buf *RowBuffer[T]) Rows() Rows {
	return &rowBufferRows{rows: buf.rows, schema: buf.schema}
}

// Write writes rows to the buffer, returning the number of rows written.
func (buf *RowBuffer[T]) Write(rows []T) (int, error) {
	for i := range rows {
		off := len(buf.values)
		buf.values = buf.schema.Deconstruct(buf.values, &rows[i])
		end := len(buf.values)
		row := buf.values[off:end:end]
		buf.alloc.capture(row)
		buf.rows = append(buf.rows, row)
	}
	return len(rows), nil
}

// WriteRows writes parquet rows to the buffer, returing the number of rows
// written.
func (buf *RowBuffer[T]) WriteRows(rows []Row) (int, error) {
	for i := range rows {
		off := len(buf.values)
		buf.values = append(buf.values, rows[i]...)
		end := len(buf.values)
		row := buf.values[off:end:end]
		buf.alloc.capture(row)
		buf.rows = append(buf.rows, row)
	}
	return len(rows), nil
}

type rowBufferColumnChunk struct{ page rowBufferPage }

func (c *rowBufferColumnChunk) Type() Type { return c.page.Type() }

func (c *rowBufferColumnChunk) Column() int { return c.page.Column() }

func (c *rowBufferColumnChunk) Pages() Pages { return onePage(&c.page) }

func (c *rowBufferColumnChunk) ColumnIndex() ColumnIndex { return nil }

func (c *rowBufferColumnChunk) OffsetIndex() OffsetIndex { return nil }

func (c *rowBufferColumnChunk) BloomFilter() BloomFilter { return nil }

func (c *rowBufferColumnChunk) NumValues() int64 { return c.page.NumValues() }

type rowBufferPage struct {
	rows               []Row
	typ                Type
	column             int
	maxRepetitionLevel byte
	maxDefinitionLevel byte
}

func (p *rowBufferPage) Type() Type { return p.typ }

func (p *rowBufferPage) Column() int { return p.column }

func (p *rowBufferPage) Dictionary() Dictionary { return nil }

func (p *rowBufferPage) NumRows() int64 { return int64(len(p.rows)) }

func (p *rowBufferPage) NumValues() int64 {
	numValues := int64(0)
	p.scan(func(value Value) {
		if !value.isNull() {
			numValues++
		}
	})
	return numValues
}

func (p *rowBufferPage) NumNulls() int64 {
	numNulls := int64(0)
	p.scan(func(value Value) {
		if value.isNull() {
			numNulls++
		}
	})
	return numNulls
}

func (p *rowBufferPage) Bounds() (min, max Value, ok bool) {
	p.scan(func(value Value) {
		if !value.IsNull() {
			switch {
			case !ok:
				min, max, ok = value, value, true
			case p.typ.Compare(value, min) < 0:
				min = value
			case p.typ.Compare(value, max) > 0:
				max = value
			}
		}
	})
	return min, max, ok
}

func (p *rowBufferPage) Size() int64 { return 0 }

func (p *rowBufferPage) Values() ValueReader {
	return &rowBufferPageValueReader{
		page:        p,
		columnIndex: ^int16(p.column),
	}
}

func (p *rowBufferPage) Clone() Page {
	rows := make([]Row, len(p.rows))
	for i := range rows {
		rows[i] = p.rows[i].Clone()
	}
	return &rowBufferPage{
		rows:   rows,
		typ:    p.typ,
		column: p.column,
	}
}

func (p *rowBufferPage) Slice(i, j int64) Page {
	return &rowBufferPage{
		rows:   p.rows[i:j],
		typ:    p.typ,
		column: p.column,
	}
}

func (p *rowBufferPage) RepetitionLevels() (repetitionLevels []byte) {
	if p.maxRepetitionLevel != 0 {
		repetitionLevels = make([]byte, 0, len(p.rows))
		p.scan(func(value Value) {
			repetitionLevels = append(repetitionLevels, value.repetitionLevel)
		})
	}
	return repetitionLevels
}

func (p *rowBufferPage) DefinitionLevels() (definitionLevels []byte) {
	if p.maxDefinitionLevel != 0 {
		definitionLevels = make([]byte, 0, len(p.rows))
		p.scan(func(value Value) {
			definitionLevels = append(definitionLevels, value.definitionLevel)
		})
	}
	return definitionLevels
}

func (p *rowBufferPage) Data() encoding.Values {
	switch p.typ.Kind() {
	case Boolean:
		values := make([]byte, (len(p.rows)+7)/8)
		numValues := 0
		p.scanNonNull(func(value Value) {
			if value.boolean() {
				i := uint(numValues) / 8
				j := uint(numValues) % 8
				values[i] |= 1 << j
			}
			numValues++
		})
		return encoding.BooleanValues(values[:(numValues+7)/8])

	case Int32:
		values := make([]int32, 0, len(p.rows))
		p.scanNonNull(func(value Value) { values = append(values, value.int32()) })
		return encoding.Int32Values(values)

	case Int64:
		values := make([]int64, 0, len(p.rows))
		p.scanNonNull(func(value Value) { values = append(values, value.int64()) })
		return encoding.Int64Values(values)

	case Int96:
		values := make([]deprecated.Int96, 0, len(p.rows))
		p.scanNonNull(func(value Value) { values = append(values, value.int96()) })
		return encoding.Int96Values(values)

	case Float:
		values := make([]float32, 0, len(p.rows))
		p.scanNonNull(func(value Value) { values = append(values, value.float()) })
		return encoding.FloatValues(values)

	case Double:
		values := make([]float64, 0, len(p.rows))
		p.scanNonNull(func(value Value) { values = append(values, value.double()) })
		return encoding.DoubleValues(values)

	case ByteArray:
		values := make([]byte, 0, p.typ.EstimateSize(len(p.rows)))
		offsets := make([]uint32, 0, len(p.rows))
		p.scanNonNull(func(value Value) {
			offsets = append(offsets, uint32(len(values)))
			values = append(values, value.byteArray()...)
		})
		offsets = append(offsets, uint32(len(values)))
		return encoding.ByteArrayValues(values, offsets)

	case FixedLenByteArray:
		length := p.typ.Length()
		values := make([]byte, 0, length*len(p.rows))
		p.scanNonNull(func(value Value) { values = append(values, value.byteArray()...) })
		return encoding.FixedLenByteArrayValues(values, length)

	default:
		return encoding.Values{}
	}
}

func (p *rowBufferPage) scan(f func(Value)) {
	columnIndex := ^int16(p.column)

	for _, row := range p.rows {
		for _, value := range row {
			if value.columnIndex == columnIndex {
				f(value)
			}
		}
	}
}

func (p *rowBufferPage) scanNonNull(f func(Value)) {
	p.scan(func(value Value) {
		if !value.isNull() {
			f(value)
		}
	})
}

type rowBufferPageValueReader struct {
	page        *rowBufferPage
	rowIndex    int
	valueIndex  int
	columnIndex int16
}

func (r *rowBufferPageValueReader) ReadValues(values []Value) (n int, err error) {
	for n < len(values) && r.rowIndex < len(r.page.rows) {
		for n < len(values) && r.valueIndex < len(r.page.rows[r.rowIndex]) {
			if v := r.page.rows[r.rowIndex][r.valueIndex]; v.columnIndex == r.columnIndex {
				values[n] = v
				n++
			}
			r.valueIndex++
		}
		r.rowIndex++
		r.valueIndex = 0
	}
	if r.rowIndex == len(r.page.rows) {
		err = io.EOF
	}
	return n, err
}

type rowBufferRows struct {
	rows   []Row
	index  int
	schema *Schema
}

func (r *rowBufferRows) Close() error {
	r.index = -1
	return nil
}

func (r *rowBufferRows) Schema() *Schema {
	return r.schema
}

func (r *rowBufferRows) SeekToRow(rowIndex int64) error {
	if rowIndex < 0 {
		return ErrSeekOutOfRange
	}

	if r.index < 0 {
		return io.ErrClosedPipe
	}

	maxRowIndex := int64(len(r.rows))
	if rowIndex > maxRowIndex {
		rowIndex = maxRowIndex
	}

	r.index = int(rowIndex)
	return nil
}

func (r *rowBufferRows) ReadRows(rows []Row) (n int, err error) {
	if r.index < 0 {
		return 0, io.EOF
	}

	if n = len(r.rows) - r.index; n > len(rows) {
		n = len(rows)
	}

	for i, row := range r.rows[r.index : r.index+n] {
		rows[i] = append(rows[i][:0], row...)
	}

	if r.index += n; r.index == len(r.rows) {
		err = io.EOF
	}

	return n, err
}

func (r *rowBufferRows) WriteRowsTo(w RowWriter) (int64, error) {
	n, err := w.WriteRows(r.rows[r.index:])
	r.index += n
	return int64(n), err
}

var (
	_ RowGroup       = (*RowBuffer[any])(nil)
	_ RowWriter      = (*RowBuffer[any])(nil)
	_ sort.Interface = (*RowBuffer[any])(nil)

	_ RowWriterTo = (*rowBufferRows)(nil)
)


================================================
FILE: row_buffer_test.go
================================================
//go:build go1.18

package parquet_test

import (
	"bytes"
	"encoding/binary"
	"errors"
	"fmt"
	"io"
	"math/rand"
	"reflect"
	"sort"
	"testing"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/encoding"
)

func TestRowBuffer(t *testing.T) {
	testRowBuffer[booleanColumn](t)
	testRowBuffer[int32Column](t)
	testRowBuffer[int64Column](t)
	testRowBuffer[int96Column](t)
	testRowBuffer[floatColumn](t)
	testRowBuffer[doubleColumn](t)
	testRowBuffer[byteArrayColumn](t)
	testRowBuffer[fixedLenByteArrayColumn](t)
	testRowBuffer[stringColumn](t)
	testRowBuffer[indexedStringColumn](t)
	testRowBuffer[uuidColumn](t)
	testRowBuffer[timeColumn](t)
	testRowBuffer[timeInMillisColumn](t)
	testRowBuffer[mapColumn](t)
	testRowBuffer[decimalColumn](t)
	testRowBuffer[addressBook](t)
	testRowBuffer[contact](t)
	testRowBuffer[listColumn2](t)
	testRowBuffer[listColumn1](t)
	testRowBuffer[listColumn0](t)
	testRowBuffer[nestedListColumn1](t)
	testRowBuffer[nestedListColumn](t)
	testRowBuffer[*contact](t)
	testRowBuffer[paddedBooleanColumn](t)
	testRowBuffer[optionalInt32Column](t)
	testRowBuffer[repeatedInt32Column](t)

	for _, test := range bufferTests {
		t.Run(test.scenario, func(t *testing.T) {
			for _, mod := range [...]struct {
				scenario string
				function func(parquet.Node) parquet.Node
			}{
				{scenario: "optional", function: parquet.Optional},
				{scenario: "repeated", function: parquet.Repeated},
				{scenario: "required", function: parquet.Required},
			} {
				t.Run(mod.scenario, func(t *testing.T) {
					for _, ordering := range [...]struct {
						scenario string
						sorting  parquet.SortingColumn
						sortFunc func(parquet.Type, []parquet.Value)
					}{
						{scenario: "unordered", sorting: nil, sortFunc: unordered},
						{scenario: "ascending", sorting: parquet.Ascending("data"), sortFunc: ascending},
						{scenario: "descending", sorting: parquet.Descending("data"), sortFunc: descending},
					} {
						t.Run(ordering.scenario, func(t *testing.T) {
							schema := parquet.NewSchema("test", parquet.Group{
								"data": mod.function(parquet.Leaf(test.typ)),
							})

							options := []parquet.RowGroupOption{
								schema,
							}

							if ordering.sorting != nil {
								options = append(options,
									parquet.SortingRowGroupConfig(
										parquet.SortingColumns(ordering.sorting),
									),
								)
							}

							content := new(bytes.Buffer)
							buffer := parquet.NewRowBuffer[any](options...)

							for _, values := range test.values {
								t.Run("", func(t *testing.T) {
									defer content.Reset()
									defer buffer.Reset()
									fields := schema.Fields()
									testRowBufferAny(t, fields[0], buffer, &parquet.Plain, values, ordering.sortFunc)
								})
							}
						})
					}
				})
			}
		})
	}
}

func testRowBuffer[Row any](t *testing.T) {
	var model Row
	t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) {
		err := quickCheck(func(rows []Row) bool {
			if len(rows) == 0 {
				return true // TODO: fix support for parquet files with zero rows
			}
			if err := testRowBufferRows(rows); err != nil {
				t.Error(err)
				return false
			}
			return true
		})
		if err != nil {
			t.Error(err)
		}
	})
}

func testRowBufferRows[Row any](rows []Row) error {
	setNullPointers(rows)
	buffer := parquet.NewRowBuffer[Row]()
	_, err := buffer.Write(rows)
	if err != nil {
		return err
	}
	reader := parquet.NewGenericRowGroupReader[Row](buffer)
	result := make([]Row, len(rows))
	n, err := reader.Read(result)
	if err != nil && !errors.Is(err, io.EOF) {
		return err
	}
	if n < len(rows) {
		return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n)
	}
	if !reflect.DeepEqual(rows, result) {
		return fmt.Errorf("rows mismatch:\nwant: %#v\ngot:  %#v", rows, result)
	}
	return nil
}

func testRowBufferAny(t *testing.T, node parquet.Node, buffer *parquet.RowBuffer[any], encoding encoding.Encoding, values []any, sortFunc sortFunc) {
	repetitionLevel := 0
	definitionLevel := 0
	if !node.Required() {
		definitionLevel = 1
	}

	minValue := parquet.Value{}
	maxValue := parquet.Value{}
	batch := make([]parquet.Value, len(values))
	for i := range values {
		batch[i] = parquet.ValueOf(values[i]).Level(repetitionLevel, definitionLevel, 0)
	}

	for i := range batch {
		_, err := buffer.WriteRows([]parquet.Row{batch[i : i+1]})
		if err != nil {
			t.Fatalf("writing value to row group: %v", err)
		}
	}

	numRows := buffer.NumRows()
	if numRows != int64(len(batch)) {
		t.Fatalf("number of rows mismatch: want=%d got=%d", len(batch), numRows)
	}

	typ := node.Type()
	for _, value := range batch {
		if minValue.IsNull() || typ.Compare(value, minValue) < 0 {
			minValue = value
		}
		if maxValue.IsNull() || typ.Compare(value, maxValue) > 0 {
			maxValue = value
		}
	}

	sortFunc(typ, batch)
	sort.Sort(buffer)

	pages := buffer.ColumnChunks()[0].Pages()
	page, err := pages.ReadPage()
	defer pages.Close()

	if err == io.EOF {
		if numRows != 0 {
			t.Fatalf("no pages found in row buffer despite having %d rows", numRows)
		} else {
			return
		}
	}

	numValues := page.NumValues()
	if numValues != int64(len(batch)) {
		t.Fatalf("number of values mistmatch: want=%d got=%d", len(batch), numValues)
	}

	numNulls := page.NumNulls()
	if numNulls != 0 {
		t.Fatalf("number of nulls mismatch: want=0 got=%d", numNulls)
	}

	min, max, hasBounds := page.Bounds()
	if !hasBounds && numRows > 0 {
		t.Fatal("page bounds are missing")
	}
	if !parquet.Equal(min, minValue) {
		t.Fatalf("min value mismatch: want=%v got=%v", minValue, min)
	}
	if !parquet.Equal(max, maxValue) {
		t.Fatalf("max value mismatch: want=%v got=%v", maxValue, max)
	}

	// We write a single value per row, so num values = num rows for all pages
	// including repeated ones, which makes it OK to slice the pages using the
	// number of values as a proxy for the row indexes.
	halfValues := numValues / 2

	for _, test := range [...]struct {
		scenario string
		values   []parquet.Value
		reader   parquet.ValueReader
	}{
		{"page", batch, page.Values()},
		{"head", batch[:halfValues], page.Slice(0, halfValues).Values()},
		{"tail", batch[halfValues:], page.Slice(halfValues, numValues).Values()},
	} {
		v := [1]parquet.Value{}
		i := 0

		for {
			n, err := test.reader.ReadValues(v[:])
			if n > 0 {
				if n != 1 {
					t.Fatalf("reading value from %q reader returned the wrong count: want=1 got=%d", test.scenario, n)
				}
				if i < len(test.values) {
					if !parquet.Equal(v[0], test.values[i]) {
						t.Fatalf("%q value at index %d mismatches: want=%v got=%v", test.scenario, i, test.values[i], v[0])
					}
				}
				i++
			}
			if err != nil {
				if err == io.EOF {
					break
				}
				t.Fatalf("reading value from %q reader: %v", test.scenario, err)
			}
		}

		if i != len(test.values) {
			t.Errorf("wrong number of values read from %q reader: want=%d got=%d", test.scenario, len(test.values), i)
		}
	}
}

func BenchmarkSortRowBuffer(b *testing.B) {
	type Row struct {
		I0 int64
		I1 int64
		I2 int64
		I3 int64
		I4 int64
		I5 int64
		I6 int64
		I7 int64
		I8 int64
		I9 int64
		ID [16]byte
	}

	buf := parquet.NewRowBuffer[Row](
		parquet.SortingRowGroupConfig(
			parquet.SortingColumns(
				parquet.Ascending("ID"),
			),
		),
	)

	rows := make([]Row, 10e3)
	prng := rand.New(rand.NewSource(0))

	for i := range rows {
		binary.LittleEndian.PutUint64(rows[i].ID[:8], uint64(i))
		binary.LittleEndian.PutUint64(rows[i].ID[8:], ^uint64(i))
	}

	buf.Write(rows)
	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		for j := 0; j < 10; j++ {
			buf.Swap(prng.Intn(len(rows)), prng.Intn(len(rows)))
		}

		sort.Sort(buf)
	}
}

func BenchmarkMergeRowBuffers(b *testing.B) {
	type Row struct {
		ID int64 `parquet:"id"`
	}

	const (
		numBuffers       = 100
		numRowsPerBuffer = 10e3
	)

	rows := [numBuffers][numRowsPerBuffer]Row{}
	nextID := int64(0)
	for i := 0; i < numRowsPerBuffer; i++ {
		for j := 0; j < numBuffers; j++ {
			rows[j][i].ID = nextID
			nextID++
		}
	}

	options := []parquet.RowGroupOption{
		parquet.SortingRowGroupConfig(
			parquet.SortingColumns(
				parquet.Ascending("id"),
			),
		),
	}

	rowGroups := make([]parquet.RowGroup, numBuffers)
	for i := range rowGroups {
		buffer := parquet.NewRowBuffer[Row](options...)
		buffer.Write(rows[i][:])
		rowGroups[i] = buffer
	}

	merge, err := parquet.MergeRowGroups(rowGroups, options...)
	if err != nil {
		b.Fatal(err)
	}

	b.ResetTimer()

	for i := 0; i < b.N; i++ {
		rows := merge.Rows()
		_, err := parquet.CopyRows(discardRows{}, rows)
		rows.Close()
		if err != nil {
			b.Fatal(err)
		}
	}
}

type discardRows struct{}

func (discardRows) WriteRows(rows []parquet.Row) (int, error) {
	return len(rows), nil
}


================================================
FILE: row_builder.go
================================================
package parquet

// RowBuilder is a type which helps build parquet rows incrementally by adding
// values to columns.
type RowBuilder struct {
	columns [][]Value
	models  []Value
	levels  []columnLevel
	groups  []*columnGroup
}

type columnLevel struct {
	repetitionDepth byte
	repetitionLevel byte
	definitionLevel byte
}

type columnGroup struct {
	baseColumn      []Value
	members         []int16
	startIndex      int16
	endIndex        int16
	repetitionLevel byte
	definitionLevel byte
}

// NewRowBuilder constructs a RowBuilder which builds rows for the parquet
// schema passed as argument.
func NewRowBuilder(schema Node) *RowBuilder {
	if schema.Leaf() {
		panic("schema of row builder must be a group")
	}
	n := numLeafColumnsOf(schema)
	b := &RowBuilder{
		columns: make([][]Value, n),
		models:  make([]Value, n),
		levels:  make([]columnLevel, n),
	}
	buffers := make([]Value, len(b.columns))
	for i := range b.columns {
		b.columns[i] = buffers[i : i : i+1]
	}
	topGroup := &columnGroup{baseColumn: []Value{{}}}
	endIndex := b.configure(schema, 0, columnLevel{}, topGroup)
	topGroup.endIndex = endIndex
	b.groups = append(b.groups, topGroup)
	return b
}

func (b *RowBuilder) configure(node Node, columnIndex int16, level columnLevel, group *columnGroup) (endIndex int16) {
	switch {
	case node.Optional():
		level.definitionLevel++
		endIndex = b.configure(Required(node), columnIndex, level, group)

		for i := columnIndex; i < endIndex; i++ {
			b.models[i].kind = 0 // null if not set
			b.models[i].ptr = nil
			b.models[i].u64 = 0
		}

	case node.Repeated():
		level.definitionLevel++

		group = &columnGroup{
			startIndex:      columnIndex,
			repetitionLevel: level.repetitionDepth,
			definitionLevel: level.definitionLevel,
		}

		level.repetitionDepth++
		endIndex = b.configure(Required(node), columnIndex, level, group)

		for i := columnIndex; i < endIndex; i++ {
			b.models[i].kind = 0 // null if not set
			b.models[i].ptr = nil
			b.models[i].u64 = 0
		}

		group.endIndex = endIndex
		b.groups = append(b.groups, group)

	case node.Leaf():
		typ := node.Type()
		kind := typ.Kind()
		model := makeValueKind(kind)
		model.repetitionLevel = level.repetitionLevel
		model.definitionLevel = level.definitionLevel
		// FIXED_LEN_BYTE_ARRAY is the only type which needs to be given a
		// non-nil zero-value if the field is required.
		if kind == FixedLenByteArray {
			zero := make([]byte, typ.Length())
			model.ptr = &zero[0]
			model.u64 = uint64(len(zero))
		}
		group.members = append(group.members, columnIndex)
		b.models[columnIndex] = model
		b.levels[columnIndex] = level
		endIndex = columnIndex + 1

	default:
		endIndex = columnIndex

		for _, field := range node.Fields() {
			endIndex = b.configure(field, endIndex, level, group)
		}
	}
	return endIndex
}

// Add adds columnValue to the column at columnIndex.
func (b *RowBuilder) Add(columnIndex int, columnValue Value) {
	level := &b.levels[columnIndex]
	columnValue.repetitionLevel = level.repetitionLevel
	columnValue.definitionLevel = level.definitionLevel
	columnValue.columnIndex = ^int16(columnIndex)
	level.repetitionLevel = level.repetitionDepth
	b.columns[columnIndex] = append(b.columns[columnIndex], columnValue)
}

// Next must be called to indicate the start of a new repeated record for the
// column at the given index.
//
// If the column index is part of a repeated group, the builder automatically
// starts a new record for all adjacent columns, the application does not need
// to call this method for each column of the repeated group.
//
// Next must be called after adding a sequence of records.
func (b *RowBuilder) Next(columnIndex int) {
	for _, group := range b.groups {
		if group.startIndex <= int16(columnIndex) && int16(columnIndex) < group.endIndex {
			for i := group.startIndex; i < group.endIndex; i++ {
				if level := &b.levels[i]; level.repetitionLevel != 0 {
					level.repetitionLevel = group.repetitionLevel
				}
			}
			break
		}
	}
}

// Reset clears the internal state of b, making it possible to reuse while
// retaining the internal buffers.
func (b *RowBuilder) Reset() {
	for i, column := range b.columns {
		clearValues(column)
		b.columns[i] = column[:0]
	}
	for i := range b.levels {
		b.levels[i].repetitionLevel = 0
	}
}

// Row materializes the current state of b into a parquet row.
func (b *RowBuilder) Row() Row {
	numValues := 0
	for _, column := range b.columns {
		numValues += len(column)
	}
	return b.AppendRow(make(Row, 0, numValues))
}

// AppendRow appends the current state of b to row and returns it.
func (b *RowBuilder) AppendRow(row Row) Row {
	for _, group := range b.groups {
		maxColumn := group.baseColumn

		for _, columnIndex := range group.members {
			if column := b.columns[columnIndex]; len(column) > len(maxColumn) {
				maxColumn = column
			}
		}

		if len(maxColumn) != 0 {
			columns := b.columns[group.startIndex:group.endIndex]

			for i, column := range columns {
				if len(column) < len(maxColumn) {
					n := len(column)
					column = append(column, maxColumn[n:]...)

					columnIndex := group.startIndex + int16(i)
					model := b.models[columnIndex]

					for n < len(column) {
						v := &column[n]
						v.kind = model.kind
						v.ptr = model.ptr
						v.u64 = model.u64
						v.definitionLevel = group.definitionLevel
						v.columnIndex = ^columnIndex
						n++
					}

					columns[i] = column
				}
			}
		}
	}

	return appendRow(row, b.columns)
}


================================================
FILE: row_builder_test.go
================================================
package parquet_test

import (
	"fmt"
	"testing"

	"github.com/segmentio/parquet-go"
)

func ExampleRowBuilder() {
	builder := parquet.NewRowBuilder(parquet.Group{
		"birth_date": parquet.Optional(parquet.Date()),
		"first_name": parquet.String(),
		"last_name":  parquet.String(),
	})

	builder.Add(1, parquet.ByteArrayValue([]byte("Luke")))
	builder.Add(2, parquet.ByteArrayValue([]byte("Skywalker")))

	row := builder.Row()
	row.Range(func(columnIndex int, columnValues []parquet.Value) bool {
		fmt.Printf("%+v\n", columnValues[0])
		return true
	})

	// Output:
	// C:0 D:0 R:0 V:<null>
	// C:1 D:0 R:0 V:Luke
	// C:2 D:0 R:0 V:Skywalker
}

func TestRowBuilder(t *testing.T) {
	type (
		operation  = func(*parquet.RowBuilder)
		operations = []operation
	)

	add := func(columnIndex int, columnValue parquet.Value) operation {
		return func(b *parquet.RowBuilder) { b.Add(columnIndex, columnValue) }
	}

	next := func(columnIndex int) operation {
		return func(b *parquet.RowBuilder) { b.Next(columnIndex) }
	}

	tests := []struct {
		scenario   string
		operations operations
		want       parquet.Row
		schema     parquet.Node
	}{
		{
			scenario: "add missing required column value",
			want: parquet.Row{
				parquet.Int64Value(0).Level(0, 0, 0),
			},
			schema: parquet.Group{
				"id": parquet.Int(64),
			},
		},

		{
			scenario: "set required column value",
			operations: operations{
				add(0, parquet.Int64Value(1)),
			},
			want: parquet.Row{
				parquet.Int64Value(1).Level(0, 0, 0),
			},
			schema: parquet.Group{
				"id": parquet.Int(64),
			},
		},

		{
			scenario: "set repeated column values",
			operations: operations{
				add(0, parquet.Int64Value(1)),
				add(1, parquet.ByteArrayValue([]byte(`1`))),
				add(1, parquet.ByteArrayValue([]byte(`2`))),
				add(1, parquet.ByteArrayValue([]byte(`3`))),
			},
			want: parquet.Row{
				parquet.Int64Value(1).Level(0, 0, 0),
				parquet.ByteArrayValue([]byte(`1`)).Level(0, 1, 1),
				parquet.ByteArrayValue([]byte(`2`)).Level(1, 1, 1),
				parquet.ByteArrayValue([]byte(`3`)).Level(1, 1, 1),
			},
			schema: parquet.Group{
				"id":    parquet.Int(64),
				"names": parquet.Repeated(parquet.String()),
			},
		},

		{
			scenario: "add missing repeated column value",
			operations: operations{
				add(0, parquet.Int64Value(1)),
			},
			want: parquet.Row{
				parquet.Int64Value(1).Level(0, 0, 0),
				parquet.NullValue().Level(0, 0, 1),
			},
			schema: parquet.Group{
				"id":    parquet.Int(64),
				"names": parquet.Repeated(parquet.String()),
			},
		},

		{
			scenario: "add missing optional column value",
			operations: operations{
				add(0, parquet.Int64Value(1)),
			},
			want: parquet.Row{
				parquet.Int64Value(1).Level(0, 0, 0),
				parquet.NullValue().Level(0, 0, 1),
			},
			schema: parquet.Group{
				"id":   parquet.Int(64),
				"name": parquet.Optional(parquet.String()),
			},
		},

		{
			scenario: "add missing nested column values",
			operations: operations{
				add(0, parquet.Int64Value(1)),
			},
			want: parquet.Row{
				parquet.Int64Value(1).Level(0, 0, 0),
				parquet.NullValue().Level(0, 0, 1),
				parquet.ByteArrayValue(nil).Level(0, 0, 2),
				parquet.ByteArrayValue(nil).Level(0, 0, 3),
			},
			schema: parquet.Group{
				"id": parquet.Int(64),
				"profile": parquet.Group{
					"first_name": parquet.String(),
					"last_name":  parquet.String(),
					"birth_date": parquet.Optional(parquet.Date()),
				},
			},
		},

		{
			scenario: "add missing repeated column group",
			operations: operations{
				add(0, parquet.Int64Value(1)),
				add(2, parquet.ByteArrayValue([]byte(`me`))),
				add(1, parquet.Int32Value(0)),
				add(1, parquet.Int32Value(123456)),
				add(2, parquet.ByteArrayValue([]byte(`you`))),
			},
			want: parquet.Row{
				parquet.Int64Value(1).Level(0, 0, 0),

				parquet.Int32Value(0).Level(0, 2, 1),
				parquet.Int32Value(123456).Level(1, 2, 1),

				parquet.ByteArrayValue([]byte(`me`)).Level(0, 1, 2),
				parquet.ByteArrayValue([]byte(`you`)).Level(1, 1, 2),

				parquet.NullValue().Level(0, 1, 3),
				parquet.NullValue().Level(1, 1, 3),
			},
			schema: parquet.Group{
				"id": parquet.Int(64),
				"profiles": parquet.Repeated(parquet.Group{
					"first_name": parquet.String(),
					"last_name":  parquet.String(),
					"birth_date": parquet.Optional(parquet.Date()),
				}),
			},
		},

		{
			scenario: "empty map",
			want: parquet.Row{
				parquet.Value{}.Level(0, 0, 0),
				parquet.Value{}.Level(0, 0, 1),
			},
			schema: parquet.Group{
				"map": parquet.Repeated(parquet.Group{
					"key_value": parquet.Group{
						"key":   parquet.String(),
						"value": parquet.Optional(parquet.String()),
					},
				}),
			},
		},

		{
			scenario: "one nested maps",
			operations: operations{
				add(0, parquet.ByteArrayValue([]byte(`A`))),
				add(1, parquet.ByteArrayValue([]byte(`1`))),
				add(0, parquet.ByteArrayValue([]byte(`B`))),
				add(1, parquet.ByteArrayValue([]byte(`2`))),
			},
			want: parquet.Row{
				// objects.attributes.key_value.key
				parquet.ByteArrayValue([]byte(`A`)).Level(0, 2, 0),
				parquet.ByteArrayValue([]byte(`B`)).Level(2, 2, 0),
				// objects.attributes.key_value.value
				parquet.ByteArrayValue([]byte(`1`)).Level(0, 3, 1),
				parquet.ByteArrayValue([]byte(`2`)).Level(2, 3, 1),
			},
			schema: parquet.Group{
				"objects": parquet.Repeated(parquet.Group{
					"attributes": parquet.Repeated(parquet.Group{
						"key_value": parquet.Group{
							"key":   parquet.String(),
							"value": parquet.Optional(parquet.String()),
						},
					}),
				}),
			},
		},

		{
			scenario: "multiple nested maps",
			operations: operations{
				add(0, parquet.ByteArrayValue([]byte(`A`))),
				add(1, parquet.ByteArrayValue([]byte(`1`))),
				add(0, parquet.ByteArrayValue([]byte(`B`))),
				add(1, parquet.ByteArrayValue([]byte(`2`))),
				next(1), // same as next(0) because the columns are in the same group
				add(0, parquet.ByteArrayValue([]byte(`C`))),
				add(1, parquet.ByteArrayValue([]byte(`3`))),
			},
			want: parquet.Row{
				// objects.attributes.key_value.key
				parquet.ByteArrayValue([]byte(`A`)).Level(0, 2, 0),
				parquet.ByteArrayValue([]byte(`B`)).Level(2, 2, 0),
				parquet.ByteArrayValue([]byte(`C`)).Level(1, 2, 0),
				// objects.attributes.key_value.value
				parquet.ByteArrayValue([]byte(`1`)).Level(0, 3, 1),
				parquet.ByteArrayValue([]byte(`2`)).Level(2, 3, 1),
				parquet.ByteArrayValue([]byte(`3`)).Level(1, 3, 1),
			},
			schema: parquet.Group{
				"objects": parquet.Repeated(parquet.Group{
					"attributes": parquet.Repeated(parquet.Group{
						"key_value": parquet.Group{
							"key":   parquet.String(),
							"value": parquet.Optional(parquet.String()),
						},
					}),
				}),
			},
		},
	}

	for _, test := range tests {
		t.Run(test.scenario, func(t *testing.T) {
			b := parquet.NewRowBuilder(test.schema)

			for i := 0; i < 2; i++ {
				for _, op := range test.operations {
					op(b)
				}

				if got := b.Row(); !got.Equal(test.want) {
					t.Fatalf("test %d: rows are not equal\nwant = %+v\ngot  = %+v", i+1, test.want, got)
				}

				b.Reset()
			}
		})
	}
}

func BenchmarkRowBuilderAdd(b *testing.B) {
	builder := parquet.NewRowBuilder(parquet.Group{
		"ids": parquet.Repeated(parquet.Int(64)),
	})

	for i := 0; i < b.N; i++ {
		builder.Add(0, parquet.Int64Value(int64(i)))

		if (i % 128) == 0 {
			builder.Reset() // so don't run out of memory ;)
		}
	}
}


================================================
FILE: row_group.go
================================================
package parquet

import (
	"fmt"
	"io"

	"github.com/segmentio/parquet-go/internal/debug"
)

// RowGroup is an interface representing a parquet row group. From the Parquet
// docs, a RowGroup is "a logical horizontal partitioning of the data into rows.
// There is no physical structure that is guaranteed for a row group. A row
// group consists of a column chunk for each column in the dataset."
//
// https://github.com/apache/parquet-format#glossary
type RowGroup interface {
	// Returns the number of rows in the group.
	NumRows() int64

	// Returns the list of column chunks in this row group. The chunks are
	// ordered in the order of leaf columns from the row group's schema.
	//
	// If the underlying implementation is not read-only, the returned
	// parquet.ColumnChunk may implement other interfaces: for example,
	// parquet.ColumnBuffer if the chunk is backed by an in-memory buffer,
	// or typed writer interfaces like parquet.Int32Writer depending on the
	// underlying type of values that can be written to the chunk.
	//
	// As an optimization, the row group may return the same slice across
	// multiple calls to this method. Applications should treat the returned
	// slice as read-only.
	ColumnChunks() []ColumnChunk

	// Returns the schema of rows in the group.
	Schema() *Schema

	// Returns the list of sorting columns describing how rows are sorted in the
	// group.
	//
	// The method will return an empty slice if the rows are not sorted.
	SortingColumns() []SortingColumn

	// Returns a reader exposing the rows of the row group.
	//
	// As an optimization, the returned parquet.Rows object may implement
	// parquet.RowWriterTo, and test the RowWriter it receives for an
	// implementation of the parquet.RowGroupWriter interface.
	//
	// This optimization mechanism is leveraged by the parquet.CopyRows function
	// to skip the generic row-by-row copy algorithm and delegate the copy logic
	// to the parquet.Rows object.
	Rows() Rows
}

// Rows is an interface implemented by row readers returned by calling the Rows
// method of RowGroup instances.
//
// Applications should call Close when they are done using a Rows instance in
// order to release the underlying resources held by the row sequence.
//
// After calling Close, all attempts to read more rows will return io.EOF.
type Rows interface {
	RowReaderWithSchema
	RowSeeker
	io.Closer
}

// RowGroupReader is an interface implemented by types that expose sequences of
// row groups to the application.
type RowGroupReader interface {
	ReadRowGroup() (RowGroup, error)
}

// RowGroupWriter is an interface implemented by types that allow the program
// to write row groups.
type RowGroupWriter interface {
	WriteRowGroup(RowGroup) (int64, error)
}

// SortingColumn represents a column by which a row group is sorted.
type SortingColumn interface {
	// Returns the path of the column in the row group schema, omitting the name
	// of the root node.
	Path() []string

	// Returns true if the column will sort values in descending order.
	Descending() bool

	// Returns true if the column will put null values at the beginning.
	NullsFirst() bool
}

// Ascending constructs a SortingColumn value which dictates to sort the column
// at the path given as argument in ascending order.
func Ascending(path ...string) SortingColumn { return ascending(path) }

// Descending constructs a SortingColumn value which dictates to sort the column
// at the path given as argument in descending order.
func Descending(path ...string) SortingColumn { return descending(path) }

// NullsFirst wraps the SortingColumn passed as argument so that it instructs
// the row group to place null values first in the column.
func NullsFirst(sortingColumn SortingColumn) SortingColumn { return nullsFirst{sortingColumn} }

type ascending []string

func (asc ascending) String() string   { return fmt.Sprintf("ascending(%s)", columnPath(asc)) }
func (asc ascending) Path() []string   { return asc }
func (asc ascending) Descending() bool { return false }
func (asc ascending) NullsFirst() bool { return false }

type descending []string

func (desc descending) String() string   { return fmt.Sprintf("descending(%s)", columnPath(desc)) }
func (desc descending) Path() []string   { return desc }
func (desc descending) Descending() bool { return true }
func (desc descending) NullsFirst() bool { return false }

type nullsFirst struct{ SortingColumn }

func (nf nullsFirst) String() string   { return fmt.Sprintf("nulls_first+%s", nf.SortingColumn) }
func (nf nullsFirst) NullsFirst() bool { return true }

func searchSortingColumn(sortingColumns []SortingColumn, path columnPath) int {
	// There are usually a few sorting columns in a row group, so the linear
	// scan is the fastest option and works whether the sorting column list
	// is sorted or not. Please revisit this decision if this code path ends
	// up being more costly than necessary.
	for i, sorting := range sortingColumns {
		if path.equal(sorting.Path()) {
			return i
		}
	}
	return len(sortingColumns)
}

func sortingColumnsHavePrefix(sortingColumns, prefix []SortingColumn) bool {
	if len(sortingColumns) < len(prefix) {
		return false
	}
	for i, sortingColumn := range prefix {
		if !sortingColumnsAreEqual(sortingColumns[i], sortingColumn) {
			return false
		}
	}
	return true
}

func sortingColumnsAreEqual(s1, s2 SortingColumn) bool {
	path1 := columnPath(s1.Path())
	path2 := columnPath(s2.Path())
	return path1.equal(path2) && s1.Descending() == s2.Descending() && s1.NullsFirst() == s2.NullsFirst()
}

type rowGroup struct {
	schema  *Schema
	numRows int64
	columns []ColumnChunk
	sorting []SortingColumn
}

func (r *rowGroup) NumRows() int64                  { return r.numRows }
func (r *rowGroup) ColumnChunks() []ColumnChunk     { return r.columns }
func (r *rowGroup) SortingColumns() []SortingColumn { return r.sorting }
func (r *rowGroup) Schema() *Schema                 { return r.schema }
func (r *rowGroup) Rows() Rows                      { return newRowGroupRows(r, ReadModeSync) }

func NewRowGroupRowReader(rowGroup RowGroup) Rows {
	return newRowGroupRows(rowGroup, ReadModeSync)
}

type rowGroupRows struct {
	rowGroup     RowGroup
	buffers      []Value
	readers      []Pages
	columns      []columnChunkRows
	inited       bool
	closed       bool
	done         chan<- struct{}
	pageReadMode ReadMode
}

type columnChunkRows struct {
	rows   int64
	offset int32
	length int32
	page   Page
	values ValueReader
}

const columnBufferSize = defaultValueBufferSize

func (r *rowGroupRows) buffer(i int) []Value {
	j := (i + 0) * columnBufferSize
	k := (i + 1) * columnBufferSize
	return r.buffers[j:k:k]
}

func newRowGroupRows(rowGroup RowGroup, pageReadMode ReadMode) *rowGroupRows {
	return &rowGroupRows{
		rowGroup:     rowGroup,
		pageReadMode: pageReadMode,
	}
}

func (r *rowGroupRows) init() {
	columns := r.rowGroup.ColumnChunks()

	r.buffers = make([]Value, len(columns)*columnBufferSize)
	r.readers = make([]Pages, len(columns))
	r.columns = make([]columnChunkRows, len(columns))

	switch r.pageReadMode {
	case ReadModeAsync:
		done := make(chan struct{})
		r.done = done
		readers := make([]asyncPages, len(columns))
		for i, column := range columns {
			readers[i].init(column.Pages(), done)
			r.readers[i] = &readers[i]
		}
	case ReadModeSync:
		for i, column := range columns {
			r.readers[i] = column.Pages()
		}
	default:
		panic(fmt.Sprintf("parquet: invalid page read mode: %d", r.pageReadMode))
	}

	r.inited = true
	// This finalizer is used to ensure that the goroutines started by calling
	// init on the underlying page readers will be shutdown in the event that
	// Close isn't called and the rowGroupRows object is garbage collected.
	debug.SetFinalizer(r, func(r *rowGroupRows) { r.Close() })
}

func (r *rowGroupRows) clear() {
	for i := range r.columns {
		Release(r.columns[i].page)
	}

	for i := range r.columns {
		r.columns[i] = columnChunkRows{}
	}

	for i := range r.buffers {
		r.buffers[i] = Value{}
	}
}

func (r *rowGroupRows) Reset() {
	for i := range r.readers {
		// Ignore errors because we are resetting the reader, if the error
		// persists we will see it on the next read, and otherwise we can
		// read back from the beginning.
		r.readers[i].SeekToRow(0)
	}
	r.clear()
}

func (r *rowGroupRows) Close() error {
	var lastErr error

	if r.done != nil {
		close(r.done)
		r.done = nil
	}

	for i := range r.readers {
		if err := r.readers[i].Close(); err != nil {
			lastErr = err
		}
	}

	r.clear()
	r.inited = true
	r.closed = true
	return lastErr
}

func (r *rowGroupRows) SeekToRow(rowIndex int64) error {
	var lastErr error

	if r.closed {
		return io.ErrClosedPipe
	}

	if !r.inited {
		r.init()
	}

	for i := range r.readers {
		if err := r.readers[i].SeekToRow(rowIndex); err != nil {
			lastErr = err
		}
	}

	r.clear()
	return lastErr
}

func (r *rowGroupRows) ReadRows(rows []Row) (int, error) {
	if r.closed {
		return 0, io.EOF
	}

	if !r.inited {
		r.init()
	}

	// Limit the number of rows that we read to the smallest number of rows
	// remaining in the current page of each column. This is necessary because
	// the pointers exposed to the returned rows need to remain valid until the
	// next call to ReadRows, SeekToRow, Reset, or Close. If we release one of
	// the columns' page, the rows that were already read during the ReadRows
	// call would be invalidated, and might reference memory locations that have
	// been reused due to pooling of page buffers.
	numRows := int64(len(rows))

	for i := range r.columns {
		c := &r.columns[i]
		// When all rows of the current page of a column have been consumed we
		// have to read the next page. This will effectively invalidate all
		// pointers of values previously held in the page, which is valid if
		// the application respects the RowReader interface and does not retain
		// parquet values without cloning them first.
		for c.rows == 0 {
			var err error
			clearValues(r.buffer(i))

			c.offset = 0
			c.length = 0
			c.values = nil
			Release(c.page)

			c.page, err = r.readers[i].ReadPage()
			if err != nil {
				if err != io.EOF {
					return 0, err
				}
				break
			}

			c.rows = c.page.NumRows()
			c.values = c.page.Values()
		}

		if c.rows < numRows {
			numRows = c.rows
		}
	}

	for i := range rows {
		rows[i] = rows[i][:0]
	}

	if numRows == 0 {
		return 0, io.EOF
	}

	n, err := r.readRows(rows[:numRows])

	for i := range r.columns {
		r.columns[i].rows -= int64(n)
	}

	return n, err
}

func (r *rowGroupRows) Schema() *Schema {
	return r.rowGroup.Schema()
}

func (r *rowGroupRows) readRows(rows []Row) (int, error) {
	for i := range rows {
	readColumns:
		for columnIndex := range r.columns {
			col := &r.columns[columnIndex]
			buf := r.buffer(columnIndex)

			skip := int32(1)
			for {
				if col.offset == col.length {
					n, err := col.values.ReadValues(buf)
					if n == 0 {
						switch err {
						case nil:
							err = io.ErrNoProgress
						case io.EOF:
							continue readColumns
						}
						return i, err
					}
					col.offset = 0
					col.length = int32(n)
				}

				_ = buf[:col.offset]
				_ = buf[:col.length]
				endOffset := col.offset + skip

				for endOffset < col.length && buf[endOffset].repetitionLevel != 0 {
					endOffset++
				}

				rows[i] = append(rows[i], buf[col.offset:endOffset]...)

				if col.offset = endOffset; col.offset < col.length {
					break
				}
				skip = 0
			}
		}
	}
	return len(rows), nil
}

type seekRowGroup struct {
	base    RowGroup
	seek    int64
	columns []ColumnChunk
}

func (g *seekRowGroup) NumRows() int64 {
	return g.base.NumRows() - g.seek
}

func (g *seekRowGroup) ColumnChunks() []ColumnChunk {
	return g.columns
}

func (g *seekRowGroup) Schema() *Schema {
	return g.base.Schema()
}

func (g *seekRowGroup) SortingColumns() []SortingColumn {
	return g.base.SortingColumns()
}

func (g *seekRowGroup) Rows() Rows {
	rows := g.base.Rows()
	rows.SeekToRow(g.seek)
	return rows
}

type seekColumnChunk struct {
	base ColumnChunk
	seek int64
}

func (c *seekColumnChunk) Type() Type {
	return c.base.Type()
}

func (c *seekColumnChunk) Column() int {
	return c.base.Column()
}

func (c *seekColumnChunk) Pages() Pages {
	pages := c.base.Pages()
	pages.SeekToRow(c.seek)
	return pages
}

func (c *seekColumnChunk) ColumnIndex() ColumnIndex {
	return c.base.ColumnIndex()
}

func (c *seekColumnChunk) OffsetIndex() OffsetIndex {
	return c.base.OffsetIndex()
}

func (c *seekColumnChunk) BloomFilter() BloomFilter {
	return c.base.BloomFilter()
}

func (c *seekColumnChunk) NumValues() int64 {
	return c.base.NumValues()
}

type emptyRowGroup struct {
	schema  *Schema
	columns []ColumnChunk
}

func newEmptyRowGroup(schema *Schema) *emptyRowGroup {
	columns := schema.Columns()
	rowGroup := &emptyRowGroup{
		schema:  schema,
		columns: make([]ColumnChunk, len(columns)),
	}
	emptyColumnChunks := make([]emptyColumnChunk, len(columns))
	for i, column := range schema.Columns() {
		leaf, _ := schema.Lookup(column...)
		emptyColumnChunks[i].typ = leaf.Node.Type()
		emptyColumnChunks[i].column = int16(leaf.ColumnIndex)
		rowGroup.columns[i] = &emptyColumnChunks[i]
	}
	return rowGroup
}

func (g *emptyRowGroup) NumRows() int64                  { return 0 }
func (g *emptyRowGroup) ColumnChunks() []ColumnChunk     { return g.columns }
func (g *emptyRowGroup) Schema() *Schema                 { return g.schema }
func (g *emptyRowGroup) SortingColumns() []SortingColumn { return nil }
func (g *emptyRowGroup) Rows() Rows                      { return emptyRows{g.schema} }

type emptyColumnChunk struct {
	typ    Type
	column int16
}

func (c *emptyColumnChunk) Type() Type               { return c.typ }
func (c *emptyColumnChunk) Column() int              { return int(c.column) }
func (c *emptyColumnChunk) Pages() Pages             { return emptyPages{} }
func (c *emptyColumnChunk) ColumnIndex() ColumnIndex { return emptyColumnIndex{} }
func (c *emptyColumnChunk) OffsetIndex() OffsetIndex { return emptyOffsetIndex{} }
func (c *emptyColumnChunk) BloomFilter() BloomFilter { return emptyBloomFilter{} }
func (c *emptyColumnChunk) NumValues() int64         { return 0 }

type emptyBloomFilter struct{}

func (emptyBloomFilter) ReadAt([]byte, int64) (int, error) { return 0, io.EOF }
func (emptyBloomFilter) Size() int64                       { return 0 }
func (emptyBloomFilter) Check(Value) (bool, error)         { return false, nil }

type emptyRows struct{ schema *Schema }

func (r emptyRows) Close() error                         { return nil }
func (r emptyRows) Schema() *Schema                      { return r.schema }
func (r emptyRows) ReadRows([]Row) (int, error)          { return 0, io.EOF }
func (r emptyRows) SeekToRow(int64) error                { return nil }
func (r emptyRows) WriteRowsTo(RowWriter) (int64, error) { return 0, nil }

type emptyPages struct{}

func (emptyPages) ReadPage() (Page, error) { return nil, io.EOF }
func (emptyPages) SeekToRow(int64) error   { return nil }
func (emptyPages) Close() error            { return nil }

var (
	_ RowReaderWithSchema = (*rowGroupRows)(nil)
	//_ RowWriterTo         = (*rowGroupRows)(nil)

	_ RowReaderWithSchema = emptyRows{}
	_ RowWriterTo         = emptyRows{}
)


================================================
FILE: row_group_test.go
================================================
package parquet_test

import (
	"bytes"
	"io"
	"reflect"
	"sort"
	"testing"

	"github.com/segmentio/parquet-go"
)

func sortedRowGroup(options []parquet.RowGroupOption, rows ...interface{}) parquet.RowGroup {
	buf := parquet.NewBuffer(options...)
	for _, row := range rows {
		buf.Write(row)
	}
	sort.Stable(buf)
	return buf
}

type Person struct {
	FirstName utf8string
	LastName  utf8string
	Age       int
}

type LastNameOnly struct {
	LastName utf8string
}

func newPeopleBuffer(people []Person) parquet.RowGroup {
	buffer := parquet.NewBuffer()
	for i := range people {
		buffer.Write(&people[i])
	}
	return buffer
}

func newPeopleFile(people []Person) parquet.RowGroup {
	buffer := new(bytes.Buffer)
	writer := parquet.NewWriter(buffer)
	for i := range people {
		writer.Write(&people[i])
	}
	writer.Close()
	reader := bytes.NewReader(buffer.Bytes())
	f, err := parquet.OpenFile(reader, reader.Size())
	if err != nil {
		panic(err)
	}
	return f.RowGroups()[0]
}

func TestSeekToRow(t *testing.T) {
	for _, config := range []struct {
		name        string
		newRowGroup func([]Person) parquet.RowGroup
	}{
		{name: "buffer", newRowGroup: newPeopleBuffer},
		{name: "file", newRowGroup: newPeopleFile},
	} {
		t.Run(config.name, func(t *testing.T) { testSeekToRow(t, config.newRowGroup) })
	}
}

func testSeekToRow(t *testing.T, newRowGroup func([]Person) parquet.RowGroup) {
	err := quickCheck(func(people []Person) bool {
		if len(people) == 0 { // TODO: fix creation of empty parquet files
			return true
		}
		rowGroup := newRowGroup(people)
		rows := rowGroup.Rows()
		rbuf := make([]parquet.Row, 1)
		pers := Person{}
		schema := parquet.SchemaOf(&pers)
		defer rows.Close()

		for i := range people {
			if err := rows.SeekToRow(int64(i)); err != nil {
				t.Errorf("seeking to row %d: %+v", i, err)
				return false
			}
			if _, err := rows.ReadRows(rbuf); err != nil {
				t.Errorf("reading row %d: %+v", i, err)
				return false
			}
			if err := schema.Reconstruct(&pers, rbuf[0]); err != nil {
				t.Errorf("deconstructing row %d: %+v", i, err)
				return false
			}
			if !reflect.DeepEqual(&pers, &people[i]) {
				t.Errorf("row %d mismatch", i)
				return false
			}
		}

		return true
	})
	if err != nil {
		t.Error(err)
	}
}

func selfRowGroup(rowGroup parquet.RowGroup) parquet.RowGroup {
	return rowGroup
}

func fileRowGroup(rowGroup parquet.RowGroup) parquet.RowGroup {
	buffer := new(bytes.Buffer)
	writer := parquet.NewWriter(buffer)
	if _, err := writer.WriteRowGroup(rowGroup); err != nil {
		panic(err)
	}
	if err := writer.Close(); err != nil {
		panic(err)
	}
	reader := bytes.NewReader(buffer.Bytes())
	f, err := parquet.OpenFile(reader, reader.Size())
	if err != nil {
		panic(err)
	}
	return f.RowGroups()[0]
}

func TestWriteRowGroupClosesRows(t *testing.T) {
	var rows []*wrappedRows
	rg := wrappedRowGroup{
		RowGroup: newPeopleFile([]Person{{}}),
		rowsCallback: func(r parquet.Rows) parquet.Rows {
			wrapped := &wrappedRows{Rows: r}
			rows = append(rows, wrapped)
			return wrapped
		},
	}
	writer := parquet.NewWriter(io.Discard)
	if _, err := writer.WriteRowGroup(rg); err != nil {
		t.Fatal(err)
	}
	if err := writer.Close(); err != nil {
		t.Fatal(err)
	}
	for _, r := range rows {
		if !r.closed {
			t.Fatal("rows not closed")
		}
	}
}


================================================
FILE: row_test.go
================================================
package parquet_test

import (
	"io"
	"reflect"
	"testing"

	"github.com/google/uuid"
	"github.com/segmentio/parquet-go"
)

type bufferedRows struct {
	rows []parquet.Row
}

func (r *bufferedRows) ReadRows(rows []parquet.Row) (int, error) {
	for i := range rows {
		if len(r.rows) == 0 {
			return i, io.EOF
		}
		rows[i] = append(rows[i][:0], r.rows[0]...)
		r.rows = r.rows[1:]
	}
	return len(rows), nil
}

func (w *bufferedRows) WriteRows(rows []parquet.Row) (int, error) {
	for _, row := range rows {
		w.rows = append(w.rows, row.Clone())
	}
	return len(rows), nil
}

func TestMultiRowWriter(t *testing.T) {
	b1 := new(bufferedRows)
	b2 := new(bufferedRows)
	mw := parquet.MultiRowWriter(b1, b2)

	rows := []parquet.Row{
		{
			parquet.Int32Value(10).Level(0, 0, 0),
			parquet.Int32Value(11).Level(0, 0, 1),
			parquet.Int32Value(12).Level(0, 0, 2),
		},
		{
			parquet.Int32Value(20).Level(0, 0, 0),
			parquet.Int32Value(21).Level(0, 0, 1),
			parquet.Int32Value(22).Level(0, 0, 2),
		},
	}

	n, err := mw.WriteRows(rows)
	if err != nil {
		t.Fatal(err)
	}
	if n != len(rows) {
		t.Fatalf("number of rows written mismatch: got=%d want=%d", n, len(rows))
	}

	assertEqualRows(t, rows, b1.rows)
	assertEqualRows(t, rows, b2.rows)
}

func TestRowClone(t *testing.T) {
	row := parquet.Row{
		parquet.ValueOf(42).Level(0, 1, 0),
		parquet.ValueOf("Hello World").Level(1, 1, 1),
	}
	if clone := row.Clone(); !row.Equal(clone) {
		t.Error("row and its clone are not equal")
	}
}

func TestDeconstructionReconstruction(t *testing.T) {
	type Person struct {
		FirstName string
		LastName  string
		Age       int     `parquet:",optional"`
		Weight    float64 `parquet:",optional"`
	}

	type Details struct {
		Person *Person
	}

	type Friend struct {
		ID      [16]byte `parquet:",uuid"`
		Details *Details
	}

	type User struct {
		ID      [16]byte `parquet:",uuid"`
		Details *Details
		Friends []Friend `parquet:",list,optional"`
	}

	type List2 struct {
		Value string `parquet:",optional"`
	}

	type List1 struct {
		List2 []List2 `parquet:",list"`
	}

	type List0 struct {
		List1 []List1 `parquet:",list"`
	}

	type nestedListsLevel1 struct {
		Level2 []string `parquet:"level2"`
	}

	type nestedLists struct {
		Level1 []nestedListsLevel1 `parquet:"level1"`
	}

	tests := []struct {
		scenario string
		input    interface{}
		values   [][]parquet.Value
	}{
		{
			scenario: "single field",
			input: struct {
				Name string
			}{Name: "Luke"},
			values: [][]parquet.Value{
				0: {parquet.ValueOf("Luke").Level(0, 0, 0)},
			},
		},

		{
			scenario: "multiple fields",
			input: Person{
				FirstName: "Han",
				LastName:  "Solo",
				Age:       42,
				Weight:    81.5,
			},
			values: [][]parquet.Value{
				0: {parquet.ValueOf("Han").Level(0, 0, 0)},
				1: {parquet.ValueOf("Solo").Level(0, 0, 1)},
				2: {parquet.ValueOf(42).Level(0, 1, 2)},
				3: {parquet.ValueOf(81.5).Level(0, 1, 3)},
			},
		},

		{
			scenario: "empty repeated field",
			input: struct {
				Symbols []string
			}{
				Symbols: []string{},
			},
			values: [][]parquet.Value{
				0: {parquet.ValueOf(nil).Level(0, 0, 0)},
			},
		},

		{
			scenario: "single repeated field",
			input: struct {
				Symbols []string
			}{
				Symbols: []string{"EUR", "USD", "GBP", "JPY"},
			},
			values: [][]parquet.Value{
				0: {
					parquet.ValueOf("EUR").Level(0, 1, 0),
					parquet.ValueOf("USD").Level(1, 1, 0),
					parquet.ValueOf("GBP").Level(1, 1, 0),
					parquet.ValueOf("JPY").Level(1, 1, 0),
				},
			},
		},

		{
			scenario: "multiple repeated field",
			input: struct {
				Symbols []string
				Values  []float32
			}{
				Symbols: []string{"EUR", "USD", "GBP", "JPY"},
				Values:  []float32{0.1, 0.2, 0.3, 0.4},
			},
			values: [][]parquet.Value{
				0: {
					parquet.ValueOf("EUR").Level(0, 1, 0),
					parquet.ValueOf("USD").Level(1, 1, 0),
					parquet.ValueOf("GBP").Level(1, 1, 0),
					parquet.ValueOf("JPY").Level(1, 1, 0),
				},
				1: {
					parquet.ValueOf(float32(0.1)).Level(0, 1, 0),
					parquet.ValueOf(float32(0.2)).Level(1, 1, 0),
					parquet.ValueOf(float32(0.3)).Level(1, 1, 0),
					parquet.ValueOf(float32(0.4)).Level(1, 1, 0),
				},
			},
		},

		{
			scenario: "top level nil pointer field",
			input: struct {
				Person *Person
			}{
				Person: nil,
			},
			// Here there are four nil values because the Person type has four
			// fields but it is nil.
			values: [][]parquet.Value{
				0: {parquet.ValueOf(nil).Level(0, 0, 0)},
				1: {parquet.ValueOf(nil).Level(0, 0, 0)},
				2: {parquet.ValueOf(nil).Level(0, 0, 0)},
				3: {parquet.ValueOf(nil).Level(0, 0, 0)},
			},
		},

		{
			scenario: "top level slice pointer",
			input: struct {
				List []*List2
			}{
				List: []*List2{
					{Value: "foo"},
					{Value: "bar"},
				},
			},
			values: [][]parquet.Value{
				0: {
					parquet.ValueOf("foo").Level(0, 2, 0),
					parquet.ValueOf("bar").Level(1, 2, 0),
				},
			},
		},

		{
			scenario: "sub level nil pointer field",
			input: User{
				ID: uuid.MustParse("A65B576D-9299-4769-9D93-04BE0583F027"),
				Details: &Details{
					Person: nil,
				},
			},
			// Here there are four nil values because the Person type has four
			// fields but it is nil.
			values: [][]parquet.Value{
				// User.ID
				0: {parquet.ValueOf(uuid.MustParse("A65B576D-9299-4769-9D93-04BE0583F027"))},
				// User.Details.Person
				1: {parquet.ValueOf(nil).Level(0, 1, 0)},
				2: {parquet.ValueOf(nil).Level(0, 1, 0)},
				3: {parquet.ValueOf(nil).Level(0, 1, 0)},
				4: {parquet.ValueOf(nil).Level(0, 1, 0)},
				// User.Friends.ID
				5: {parquet.ValueOf(nil).Level(0, 0, 0)},
				// User.Friends.Details.Person
				6: {parquet.ValueOf(nil).Level(0, 0, 0)},
				7: {parquet.ValueOf(nil).Level(0, 0, 0)},
				8: {parquet.ValueOf(nil).Level(0, 0, 0)},
				9: {parquet.ValueOf(nil).Level(0, 0, 0)},
			},
		},

		{
			scenario: "deeply nested structure",
			input: struct {
				User User
			}{
				User: User{
					ID: uuid.MustParse("A65B576D-9299-4769-9D93-04BE0583F027"),
					Details: &Details{
						Person: &Person{
							FirstName: "Luke",
							LastName:  "Skywalker",
						},
					},
					Friends: []Friend{
						{
							ID: uuid.MustParse("1B76F8D0-82C6-403F-A104-DCDA69207220"),
							Details: &Details{
								Person: &Person{
									FirstName: "Han",
									LastName:  "Solo",
								},
							},
						},

						{
							ID: uuid.MustParse("C43C8852-CCE5-40E6-B0DF-7212A5633346"),
							Details: &Details{
								Person: &Person{
									FirstName: "Leia",
									LastName:  "Skywalker",
								},
							},
						},

						{
							ID: uuid.MustParse("E78642A8-0931-4D5F-918F-24DC8FF445B0"),
							Details: &Details{
								Person: &Person{
									FirstName: "C3PO",
									LastName:  "Droid",
								},
							},
						},
					},
				},
			},

			values: [][]parquet.Value{
				// User.ID
				0: {parquet.ValueOf(uuid.MustParse("A65B576D-9299-4769-9D93-04BE0583F027"))},

				// User.Details
				1: {parquet.ValueOf("Luke").Level(0, 2, 0)},
				2: {parquet.ValueOf("Skywalker").Level(0, 2, 0)},
				3: {parquet.ValueOf(nil).Level(0, 2, 0)},
				4: {parquet.ValueOf(nil).Level(0, 2, 0)},

				5: { // User.Friends.ID
					parquet.ValueOf(uuid.MustParse("1B76F8D0-82C6-403F-A104-DCDA69207220")).Level(0, 2, 0),
					parquet.ValueOf(uuid.MustParse("C43C8852-CCE5-40E6-B0DF-7212A5633346")).Level(1, 2, 0),
					parquet.ValueOf(uuid.MustParse("E78642A8-0931-4D5F-918F-24DC8FF445B0")).Level(1, 2, 0),
				},

				6: { // User.Friends.Details.Person.FirstName
					parquet.ValueOf("Han").Level(0, 4, 0),
					parquet.ValueOf("Leia").Level(1, 4, 0),
					parquet.ValueOf("C3PO").Level(1, 4, 0),
				},

				7: { // User.Friends.Details.Person.LastName
					parquet.ValueOf("Solo").Level(0, 4, 0),
					parquet.ValueOf("Skywalker").Level(1, 4, 0),
					parquet.ValueOf("Droid").Level(1, 4, 0),
				},

				8: { // User.Friends.Details.Person.Age
					parquet.ValueOf(nil).Level(0, 4, 0),
					parquet.ValueOf(nil).Level(1, 4, 0),
					parquet.ValueOf(nil).Level(1, 4, 0),
				},

				9: { // User.Friends.Details.Person.Weight
					parquet.ValueOf(nil).Level(0, 4, 0),
					parquet.ValueOf(nil).Level(1, 4, 0),
					parquet.ValueOf(nil).Level(1, 4, 0),
				},
			},
		},

		{
			scenario: "multiple repeated levels",
			input: List0{
				List1: []List1{
					{List2: []List2{{Value: "A"}, {Value: "B"}}},
					{List2: []List2{}}, // parquet doesn't differentiate between empty repeated and a nil list
					{List2: []List2{{Value: "C"}}},
					{List2: []List2{}},
					{List2: []List2{{Value: "D"}, {Value: "E"}, {Value: "F"}}},
					{List2: []List2{{Value: "G"}, {Value: "H"}, {Value: "I"}}},
				},
			},
			values: [][]parquet.Value{
				{
					parquet.ValueOf("A").Level(0, 3, 0),
					parquet.ValueOf("B").Level(2, 3, 0),
					parquet.ValueOf(nil).Level(1, 1, 0),
					parquet.ValueOf("C").Level(1, 3, 0),
					parquet.ValueOf(nil).Level(1, 1, 0),
					parquet.ValueOf("D").Level(1, 3, 0),
					parquet.ValueOf("E").Level(2, 3, 0),
					parquet.ValueOf("F").Level(2, 3, 0),
					parquet.ValueOf("G").Level(1, 3, 0),
					parquet.ValueOf("H").Level(2, 3, 0),
					parquet.ValueOf("I").Level(2, 3, 0),
				},
			},
		},

		// https://blog.twitter.com/engineering/en_us/a/2013/dremel-made-simple-with-parquet

		// message nestedLists {
		//   repeated group level1 {
		//     repeated string level2;
		//   }
		// }
		// ---
		// {
		//   level1: {
		//     level2: a
		//     level2: b
		//     level2: c
		//   },
		//   level1: {
		//     level2: d
		//     level2: e
		//     level2: f
		//     level2: g
		//   }
		// }
		//
		{
			scenario: "twitter blog example 1",
			input: nestedLists{
				Level1: []nestedListsLevel1{
					{Level2: []string{"a", "b", "c"}},
					{Level2: []string{"d", "e", "f", "g"}},
				},
			},
			values: [][]parquet.Value{
				0: {
					parquet.ValueOf("a").Level(0, 2, 0),
					parquet.ValueOf("b").Level(2, 2, 0),
					parquet.ValueOf("c").Level(2, 2, 0),
					parquet.ValueOf("d").Level(1, 2, 0),
					parquet.ValueOf("e").Level(2, 2, 0),
					parquet.ValueOf("f").Level(2, 2, 0),
					parquet.ValueOf("g").Level(2, 2, 0),
				},
			},
		},

		// message nestedLists {
		//   repeated group level1 {
		//     repeated string level2;
		//   }
		// }
		// ---
		// {
		//   level1: {
		//     level2: h
		//   },
		//   level1: {
		//     level2: i
		//     level2: j
		//   }
		// }
		//
		{
			scenario: "twitter blog example 2",
			input: nestedLists{
				Level1: []nestedListsLevel1{
					{Level2: []string{"h"}},
					{Level2: []string{"i", "j"}},
				},
			},
			values: [][]parquet.Value{
				0: {
					parquet.ValueOf("h").Level(0, 2, 0),
					parquet.ValueOf("i").Level(1, 2, 0),
					parquet.ValueOf("j").Level(2, 2, 0),
				},
			},
		},

		// message AddressBook {
		//   required string owner;
		//   repeated string ownerPhoneNumbers;
		//   repeated group contacts {
		//     required string name;
		//     optional string phoneNumber;
		//   }
		// }
		// ---
		// AddressBook {
		//   owner: "Julien Le Dem",
		//   ownerPhoneNumbers: "555 123 4567",
		//   ownerPhoneNumbers: "555 666 1337",
		//   contacts: {
		//     name: "Dmitriy Ryaboy",
		//     phoneNumber: "555 987 6543",
		//   },
		//   contacts: {
		//     name: "Chris Aniszczyk"
		//   }
		// }
		{
			scenario: "twitter blog example 3",
			input: AddressBook{
				Owner: "Julien Le Dem",
				OwnerPhoneNumbers: []string{
					"555 123 4567",
					"555 666 1337",
				},
				Contacts: []Contact{
					{
						Name:        "Dmitriy Ryaboy",
						PhoneNumber: "555 987 6543",
					},
					{
						Name: "Chris Aniszczyk",
					},
				},
			},
			values: [][]parquet.Value{
				0: { // AddressBook.owner
					parquet.ValueOf("Julien Le Dem").Level(0, 0, 0),
				},
				1: { // AddressBook.ownerPhoneNumbers
					parquet.ValueOf("555 123 4567").Level(0, 1, 0),
					parquet.ValueOf("555 666 1337").Level(1, 1, 0),
				},
				2: { // AddressBook.contacts.name
					parquet.ValueOf("Dmitriy Ryaboy").Level(0, 1, 0),
					parquet.ValueOf("Chris Aniszczyk").Level(1, 1, 0),
				},
				3: { // AddressBook.contacts.phoneNumber
					parquet.ValueOf("555 987 6543").Level(0, 2, 0),
					parquet.ValueOf(nil).Level(1, 1, 0),
				},
			},
		},
	}

	for _, test := range tests {
		t.Run(test.scenario, func(t *testing.T) {
			schema := parquet.SchemaOf(test.input)
			row := schema.Deconstruct(nil, test.input)
			values := columnsOf(row)

			t.Logf("\n%s", schema)

			for columnIndex, expect := range test.values {
				assertEqualValues(t, columnIndex, expect, values[columnIndex])
			}

			newValue := reflect.New(reflect.TypeOf(test.input))
			if err := schema.Reconstruct(newValue.Interface(), row); err != nil {
				t.Errorf("reconstruction of the parquet row into a go value failed:\n\t%v", err)
			} else if !reflect.DeepEqual(newValue.Elem().Interface(), test.input) {
				t.Errorf("reconstruction of the parquet row into a go value produced the wrong output:\nwant = %#v\ngot  = %#v", test.input, newValue.Elem())
			}

			for columnIndex := range test.values {
				values[columnIndex] = nil
			}

			for columnIndex, unexpected := range values {
				if unexpected != nil {
					t.Errorf("unexpected column index %d found with %d values in it", columnIndex, len(unexpected))
				}
			}
		})
	}
}

func columnsOf(row parquet.Row) [][]parquet.Value {
	columns := make([][]parquet.Value, 0)
	row.Range(func(_ int, c []parquet.Value) bool {
		columns = append(columns, c)
		return true
	})
	return columns
}

func assertEqualRows(t *testing.T, want, got []parquet.Row) {
	if len(want) != len(got) {
		t.Errorf("number of rows mismatch: want=%d got=%d", len(want), len(got))
		return
	}

	for i := range want {
		row1, row2 := want[i], got[i]

		if len(row1) != len(row2) {
			t.Errorf("number of values in row %d mismatch: want=%d got=%d", i, len(row1), len(row2))
			continue
		}

		for j := range row1 {
			if value1, value2 := row1[j], row2[j]; !parquet.DeepEqual(value1, value2) {
				t.Errorf("values of row %d at index %d mismatch: want=%+v got=%+v", i, j, value1, value2)
			}
		}
	}
}

func assertEqualValues(t *testing.T, columnIndex int, want, got []parquet.Value) {
	n := len(want)

	if len(want) != len(got) {
		t.Errorf("wrong number of values in column %d: want=%d got=%d", columnIndex, len(want), len(got))
		if len(want) > len(got) {
			n = len(got)
		}
	}

	for i := 0; i < n; i++ {
		v1, v2 := want[i], got[i]

		if !parquet.Equal(v1, v2) {
			t.Errorf("values at index %d mismatch in column %d: want=%#v got=%#v", i, columnIndex, v1, v2)
		}
		if columnIndex != int(v2.Column()) {
			t.Errorf("column index mismatch in column %d: want=%d got=%#v", i, columnIndex, v2)
		}
		if v1.RepetitionLevel() != v2.RepetitionLevel() {
			t.Errorf("repetition levels at index %d mismatch in column %d: want=%#v got=%#v", i, columnIndex, v1, v2)
		}
		if v1.DefinitionLevel() != v2.DefinitionLevel() {
			t.Errorf("definition levels at index %d mismatch in column %d: want=%#v got=%#v", i, columnIndex, v1, v2)
		}
	}
}

func BenchmarkDeconstruct(b *testing.B) {
	row := &AddressBook{
		Owner: "Julien Le Dem",
		OwnerPhoneNumbers: []string{
			"555 123 4567",
			"555 666 1337",
		},
		Contacts: []Contact{
			{
				Name:        "Dmitriy Ryaboy",
				PhoneNumber: "555 987 6543",
			},
			{
				Name: "Chris Aniszczyk",
			},
		},
	}

	schema := parquet.SchemaOf(row)
	buffer := parquet.Row{}

	for i := 0; i < b.N; i++ {
		buffer = schema.Deconstruct(buffer[:0], row)
	}
}

func BenchmarkReconstruct(b *testing.B) {
	row := &AddressBook{
		Owner: "Julien Le Dem",
		OwnerPhoneNumbers: []string{
			"555 123 4567",
			"555 666 1337",
		},
		Contacts: []Contact{
			{
				Name:        "Dmitriy Ryaboy",
				PhoneNumber: "555 987 6543",
			},
			{
				Name: "Chris Aniszczyk",
			},
		},
	}

	schema := parquet.SchemaOf(row)
	values := schema.Deconstruct(nil, row)
	buffer := AddressBook{}

	for i := 0; i < b.N; i++ {
		buffer = AddressBook{}

		if err := schema.Reconstruct(&buffer, values); err != nil {
			b.Fatal(err)
		}
	}
}


================================================
FILE: scan.go
================================================
package parquet

import "io"

// ScanRowReader constructs a RowReader which exposes rows from reader until
// the predicate returns false for one of the rows, or EOF is reached.
func ScanRowReader(reader RowReader, predicate func(Row, int64) bool) RowReader {
	return &scanRowReader{reader: reader, predicate: predicate}
}

type scanRowReader struct {
	reader    RowReader
	predicate func(Row, int64) bool
	rowIndex  int64
}

func (s *scanRowReader) ReadRows(rows []Row) (int, error) {
	if s.rowIndex < 0 {
		return 0, io.EOF
	}

	n, err := s.reader.ReadRows(rows)

	for i, row := range rows[:n] {
		if !s.predicate(row, s.rowIndex) {
			s.rowIndex = -1
			return i, io.EOF
		}
		s.rowIndex++
	}

	return n, err
}


================================================
FILE: scan_test.go
================================================
package parquet_test

import (
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestScanRowReader(t *testing.T) {
	rows := []parquet.Row{
		{parquet.Int64Value(0)},
		{parquet.Int64Value(1)},
		{parquet.Int64Value(2)},
		{parquet.Int64Value(3)},
		{parquet.Int64Value(4)},
	}

	want := []parquet.Row{
		{parquet.Int64Value(0)},
		{parquet.Int64Value(1)},
		{parquet.Int64Value(2)},
	}

	reader := parquet.ScanRowReader(&bufferedRows{rows: rows},
		func(row parquet.Row, _ int64) bool {
			return row[0].Int64() < 3
		},
	)

	writer := &bufferedRows{}
	_, err := parquet.CopyRows(writer, reader)
	if err != nil {
		t.Fatal(err)
	}

	assertEqualRows(t, want, writer.rows)
}


================================================
FILE: schema.go
================================================
package parquet

import (
	"fmt"
	"math"
	"reflect"
	"strconv"
	"strings"
	"sync"
	"time"

	"github.com/google/uuid"
	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
)

// Schema represents a parquet schema created from a Go value.
//
// Schema implements the Node interface to represent the root node of a parquet
// schema.
type Schema struct {
	name        string
	root        Node
	deconstruct deconstructFunc
	reconstruct reconstructFunc
	mapping     columnMapping
	columns     [][]string
}

// SchemaOf constructs a parquet schema from a Go value.
//
// The function can construct parquet schemas from struct or pointer-to-struct
// values only. A panic is raised if a Go value of a different type is passed
// to this function.
//
// When creating a parquet Schema from a Go value, the struct fields may contain
// a "parquet" tag to describe properties of the parquet node. The "parquet" tag
// follows the conventional format of Go struct tags: a comma-separated list of
// values describe the options, with the first one defining the name of the
// parquet column.
//
// The following options are also supported in the "parquet" struct tag:
//
//	optional  | make the parquet column optional
//	snappy    | sets the parquet column compression codec to snappy
//	gzip      | sets the parquet column compression codec to gzip
//	brotli    | sets the parquet column compression codec to brotli
//	lz4       | sets the parquet column compression codec to lz4
//	zstd      | sets the parquet column compression codec to zstd
//	plain     | enables the plain encoding (no-op default)
//	dict      | enables dictionary encoding on the parquet column
//	delta     | enables delta encoding on the parquet column
//	list      | for slice types, use the parquet LIST logical type
//	enum      | for string types, use the parquet ENUM logical type
//	uuid      | for string and [16]byte types, use the parquet UUID logical type
//	decimal   | for int32, int64 and [n]byte types, use the parquet DECIMAL logical type
//	date      | for int32 types use the DATE logical type
//	timestamp | for int64 types use the TIMESTAMP logical type with, by default, millisecond precision
//	split     | for float32/float64, use the BYTE_STREAM_SPLIT encoding
//
// # The date logical type is an int32 value of the number of days since the unix epoch
//
// The timestamp precision can be changed by defining which precision to use as an argument.
// Supported precisions are: nanosecond, millisecond and microsecond. Example:
//
//	type Message struct {
//	  TimestrampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"
//	}
//
// The decimal tag must be followed by two integer parameters, the first integer
// representing the scale and the second the precision; for example:
//
//	type Item struct {
//		Cost int64 `parquet:"cost,decimal(0:3)"`
//	}
//
// Invalid combination of struct tags and Go types, or repeating options will
// cause the function to panic.
//
// As a special case, if the field tag is "-", the field is omitted from the schema
// and the data will not be written into the parquet file(s).
// Note that a field with name "-" can still be generated using the tag "-,".
//
// The configuration of Parquet maps are done via two tags:
//   - The `parquet-key` tag allows to configure the key of a map.
//   - The parquet-value tag allows users to configure a map's values, for example to declare their native Parquet types.
//
// When configuring a Parquet map, the `parquet` tag will configure the map itself.
//
// For example, the following will set the int64 key of the map to be a timestamp:
//
//	type Actions struct {
//	  Action map[int64]string `parquet:"," parquet-key:",timestamp"`
//	}
//
// The schema name is the Go type name of the value.
func SchemaOf(model interface{}) *Schema {
	return schemaOf(dereference(reflect.TypeOf(model)))
}

var cachedSchemas sync.Map // map[reflect.Type]*Schema

func schemaOf(model reflect.Type) *Schema {
	cached, _ := cachedSchemas.Load(model)
	schema, _ := cached.(*Schema)
	if schema != nil {
		return schema
	}
	if model.Kind() != reflect.Struct {
		panic("cannot construct parquet schema from value of type " + model.String())
	}
	schema = NewSchema(model.Name(), nodeOf(model, nil))
	if actual, loaded := cachedSchemas.LoadOrStore(model, schema); loaded {
		schema = actual.(*Schema)
	}
	return schema
}

// NewSchema constructs a new Schema object with the given name and root node.
//
// The function panics if Node contains more leaf columns than supported by the
// package (see parquet.MaxColumnIndex).
func NewSchema(name string, root Node) *Schema {
	mapping, columns := columnMappingOf(root)
	return &Schema{
		name:        name,
		root:        root,
		deconstruct: makeDeconstructFunc(root),
		reconstruct: makeReconstructFunc(root),
		mapping:     mapping,
		columns:     columns,
	}
}

func dereference(t reflect.Type) reflect.Type {
	for t.Kind() == reflect.Ptr {
		t = t.Elem()
	}
	return t
}

func makeDeconstructFunc(node Node) (deconstruct deconstructFunc) {
	if schema, _ := node.(*Schema); schema != nil {
		return schema.deconstruct
	}
	if !node.Leaf() {
		_, deconstruct = deconstructFuncOf(0, node)
	}
	return deconstruct
}

func makeReconstructFunc(node Node) (reconstruct reconstructFunc) {
	if schema, _ := node.(*Schema); schema != nil {
		return schema.reconstruct
	}
	if !node.Leaf() {
		_, reconstruct = reconstructFuncOf(0, node)
	}
	return reconstruct
}

// ConfigureRowGroup satisfies the RowGroupOption interface, allowing Schema
// instances to be passed to row group constructors to pre-declare the schema of
// the output parquet file.
func (s *Schema) ConfigureRowGroup(config *RowGroupConfig) { config.Schema = s }

// ConfigureReader satisfies the ReaderOption interface, allowing Schema
// instances to be passed to NewReader to pre-declare the schema of rows
// read from the reader.
func (s *Schema) ConfigureReader(config *ReaderConfig) { config.Schema = s }

// ConfigureWriter satisfies the WriterOption interface, allowing Schema
// instances to be passed to NewWriter to pre-declare the schema of the
// output parquet file.
func (s *Schema) ConfigureWriter(config *WriterConfig) { config.Schema = s }

// String returns a parquet schema representation of s.
func (s *Schema) String() string { return sprint(s.name, s.root) }

// Name returns the name of s.
func (s *Schema) Name() string { return s.name }

// Type returns the parquet type of s.
func (s *Schema) Type() Type { return s.root.Type() }

// Optional returns false since the root node of a parquet schema is always required.
func (s *Schema) Optional() bool { return s.root.Optional() }

// Repeated returns false since the root node of a parquet schema is always required.
func (s *Schema) Repeated() bool { return s.root.Repeated() }

// Required returns true since the root node of a parquet schema is always required.
func (s *Schema) Required() bool { return s.root.Required() }

// Leaf returns true if the root node of the parquet schema is a leaf column.
func (s *Schema) Leaf() bool { return s.root.Leaf() }

// Fields returns the list of fields on the root node of the parquet schema.
func (s *Schema) Fields() []Field { return s.root.Fields() }

// Encoding returns the encoding set on the root node of the parquet schema.
func (s *Schema) Encoding() encoding.Encoding { return s.root.Encoding() }

// Compression returns the compression codec set on the root node of the parquet
// schema.
func (s *Schema) Compression() compress.Codec { return s.root.Compression() }

// GoType returns the Go type that best represents the schema.
func (s *Schema) GoType() reflect.Type { return s.root.GoType() }

// Deconstruct deconstructs a Go value and appends it to a row.
//
// The method panics is the structure of the go value does not match the
// parquet schema.
func (s *Schema) Deconstruct(row Row, value interface{}) Row {
	columns := make([][]Value, len(s.columns))
	values := make([]Value, len(s.columns))

	for i := range columns {
		columns[i] = values[i : i : i+1]
	}

	s.deconstructValueToColumns(columns, reflect.ValueOf(value))
	return appendRow(row, columns)
}

func (s *Schema) deconstructValueToColumns(columns [][]Value, value reflect.Value) {
	for value.Kind() == reflect.Ptr || value.Kind() == reflect.Interface {
		if value.IsNil() {
			value = reflect.Value{}
			break
		}
		value = value.Elem()
	}
	s.deconstruct(columns, levels{}, value)
}

// Reconstruct reconstructs a Go value from a row.
//
// The go value passed as first argument must be a non-nil pointer for the
// row to be decoded into.
//
// The method panics if the structure of the go value and parquet row do not
// match.
func (s *Schema) Reconstruct(value interface{}, row Row) error {
	v := reflect.ValueOf(value)
	if !v.IsValid() {
		panic("cannot reconstruct row into go value of type <nil>")
	}
	if v.Kind() != reflect.Ptr {
		panic("cannot reconstruct row into go value of non-pointer type " + v.Type().String())
	}
	if v.IsNil() {
		panic("cannot reconstruct row into nil pointer of type " + v.Type().String())
	}
	for v.Kind() == reflect.Ptr {
		if v.IsNil() {
			v.Set(reflect.New(v.Type().Elem()))
		}
		v = v.Elem()
	}

	columns := make([][]Value, len(s.columns))
	row.Range(func(columnIndex int, columnValues []Value) bool {
		if columnIndex < len(columns) {
			columns[columnIndex] = columnValues
		}
		return true
	})

	return s.reconstruct(v, levels{}, columns)
}

// Lookup returns the leaf column at the given path.
//
// The path is the sequence of column names identifying a leaf column (not
// including the root).
//
// If the path was not found in the mapping, or if it did not represent a
// leaf column of the parquet schema, the boolean will be false.
func (s *Schema) Lookup(path ...string) (LeafColumn, bool) {
	leaf := s.mapping.lookup(path)
	return LeafColumn{
		Node:               leaf.node,
		Path:               leaf.path,
		ColumnIndex:        int(leaf.columnIndex),
		MaxRepetitionLevel: int(leaf.maxRepetitionLevel),
		MaxDefinitionLevel: int(leaf.maxDefinitionLevel),
	}, leaf.node != nil
}

// Columns returns the list of column paths available in the schema.
//
// The method always returns the same slice value across calls to ColumnPaths,
// applications should treat it as immutable.
func (s *Schema) Columns() [][]string {
	return s.columns
}

// Comparator constructs a comparator function which orders rows according to
// the list of sorting columns passed as arguments.
func (s *Schema) Comparator(sortingColumns ...SortingColumn) func(Row, Row) int {
	return compareRowsFuncOf(s, sortingColumns)
}

func (s *Schema) forEachNode(do func(name string, node Node)) {
	forEachNodeOf(s.Name(), s, do)
}

type structNode struct {
	gotype reflect.Type
	fields []structField
}

func structNodeOf(t reflect.Type) *structNode {
	// Collect struct fields first so we can order them before generating the
	// column indexes.
	fields := structFieldsOf(t)

	s := &structNode{
		gotype: t,
		fields: make([]structField, len(fields)),
	}

	for i := range fields {
		field := structField{name: fields[i].Name, index: fields[i].Index}
		field.Node = makeNodeOf(fields[i].Type, fields[i].Name, []string{
			fields[i].Tag.Get("parquet"),
			fields[i].Tag.Get("parquet-key"),
			fields[i].Tag.Get("parquet-value"),
		})
		s.fields[i] = field
	}

	return s
}

func structFieldsOf(t reflect.Type) []reflect.StructField {
	fields := appendStructFields(t, nil, nil, 0)

	for i := range fields {
		f := &fields[i]

		if tag := f.Tag.Get("parquet"); tag != "" {
			name, _ := split(tag)
			if name != "" {
				f.Name = name
			}
		}
	}

	return fields
}

func appendStructFields(t reflect.Type, fields []reflect.StructField, index []int, offset uintptr) []reflect.StructField {
	for i, n := 0, t.NumField(); i < n; i++ {
		f := t.Field(i)
		if tag := f.Tag.Get("parquet"); tag != "" {
			name, _ := split(tag)
			if tag != "-," && name == "-" {
				continue
			}
		}

		fieldIndex := index[:len(index):len(index)]
		fieldIndex = append(fieldIndex, i)

		f.Offset += offset

		if f.Anonymous {
			fields = appendStructFields(f.Type, fields, fieldIndex, f.Offset)
		} else if f.IsExported() {
			f.Index = fieldIndex
			fields = append(fields, f)
		}
	}
	return fields
}

func (s *structNode) Optional() bool { return false }

func (s *structNode) Repeated() bool { return false }

func (s *structNode) Required() bool { return true }

func (s *structNode) Leaf() bool { return false }

func (s *structNode) Encoding() encoding.Encoding { return nil }

func (s *structNode) Compression() compress.Codec { return nil }

func (s *structNode) GoType() reflect.Type { return s.gotype }

func (s *structNode) String() string { return sprint("", s) }

func (s *structNode) Type() Type { return groupType{} }

func (s *structNode) Fields() []Field {
	fields := make([]Field, len(s.fields))
	for i := range s.fields {
		fields[i] = &s.fields[i]
	}
	return fields
}

// fieldByIndex is like reflect.Value.FieldByIndex but returns the zero-value of
// reflect.Value if one of the fields was a nil pointer instead of panicking.
func fieldByIndex(v reflect.Value, index []int) reflect.Value {
	for _, i := range index {
		if v = v.Field(i); v.Kind() == reflect.Ptr || v.Kind() == reflect.Interface {
			if v.IsNil() {
				v = reflect.Value{}
				break
			} else {
				v = v.Elem()
			}
		}
	}
	return v
}

type structField struct {
	Node
	name  string
	index []int
}

func (f *structField) Name() string { return f.name }

func (f *structField) Value(base reflect.Value) reflect.Value {
	switch base.Kind() {
	case reflect.Map:
		return base.MapIndex(reflect.ValueOf(&f.name).Elem())
	case reflect.Ptr:
		if base.IsNil() {
			base.Set(reflect.New(base.Type().Elem()))
		}
		return fieldByIndex(base.Elem(), f.index)
	default:
		if len(f.index) == 1 {
			return base.Field(f.index[0])
		} else {
			return fieldByIndex(base, f.index)
		}
	}
}

func nodeString(t reflect.Type, name string, tag ...string) string {
	return fmt.Sprintf("%s %s %v", name, t.String(), tag)
}

func throwInvalidTag(t reflect.Type, name string, tag string) {
	panic(tag + " is an invalid parquet tag: " + nodeString(t, name, tag))
}

func throwUnknownTag(t reflect.Type, name string, tag string) {
	panic(tag + " is an unrecognized parquet tag: " + nodeString(t, name, tag))
}

func throwInvalidNode(t reflect.Type, msg, name string, tag ...string) {
	panic(msg + ": " + nodeString(t, name, tag...))
}

// FixedLenByteArray decimals are sized based on precision
// this function calculates the necessary byte array size.
func decimalFixedLenByteArraySize(precision int) int {
	return int(math.Ceil((math.Log10(2) + float64(precision)) / math.Log10(256)))
}

func forEachStructTagOption(sf reflect.StructField, do func(t reflect.Type, option, args string)) {
	if tag := sf.Tag.Get("parquet"); tag != "" {
		_, tag = split(tag) // skip the field name
		for tag != "" {
			option := ""
			args := ""
			option, tag = split(tag)
			option, args = splitOptionArgs(option)
			ft := sf.Type
			if ft.Kind() == reflect.Ptr {
				ft = ft.Elem()
			}
			do(ft, option, args)
		}
	}
}

func nodeOf(t reflect.Type, tag []string) Node {
	switch t {
	case reflect.TypeOf(deprecated.Int96{}):
		return Leaf(Int96Type)
	case reflect.TypeOf(uuid.UUID{}):
		return UUID()
	case reflect.TypeOf(time.Time{}):
		return Timestamp(Nanosecond)
	}

	var n Node
	switch t.Kind() {
	case reflect.Bool:
		n = Leaf(BooleanType)

	case reflect.Int, reflect.Int64:
		n = Int(64)

	case reflect.Int8, reflect.Int16, reflect.Int32:
		n = Int(t.Bits())

	case reflect.Uint, reflect.Uintptr, reflect.Uint64:
		n = Uint(64)

	case reflect.Uint8, reflect.Uint16, reflect.Uint32:
		n = Uint(t.Bits())

	case reflect.Float32:
		n = Leaf(FloatType)

	case reflect.Float64:
		n = Leaf(DoubleType)

	case reflect.String:
		n = String()

	case reflect.Ptr:
		n = Optional(nodeOf(t.Elem(), nil))

	case reflect.Slice:
		if elem := t.Elem(); elem.Kind() == reflect.Uint8 { // []byte?
			n = Leaf(ByteArrayType)
		} else {
			n = Repeated(nodeOf(elem, nil))
		}

	case reflect.Array:
		if t.Elem().Kind() == reflect.Uint8 {
			n = Leaf(FixedLenByteArrayType(t.Len()))
		}

	case reflect.Map:
		var mapTag, valueTag, keyTag string
		if len(tag) > 0 {
			mapTag = tag[0]
			if len(tag) > 1 {
				keyTag = tag[1]
			}
			if len(tag) >= 2 {
				valueTag = tag[2]
			}
		}

		if strings.Contains(mapTag, "json") {
			n = JSON()
		} else {
			n = Map(
				makeNodeOf(t.Key(), t.Name(), []string{keyTag}),
				makeNodeOf(t.Elem(), t.Name(), []string{valueTag}),
			)
		}

		forEachTagOption([]string{mapTag}, func(option, args string) {
			switch option {
			case "", "json":
				return
			case "optional":
				n = Optional(n)
			default:
				throwUnknownTag(t, "map", option)
			}
		})

	case reflect.Struct:
		return structNodeOf(t)
	}

	if n == nil {
		panic("cannot create parquet node from go value of type " + t.String())
	}

	return &goNode{Node: n, gotype: t}
}

func split(s string) (head, tail string) {
	if i := strings.IndexByte(s, ','); i < 0 {
		head = s
	} else {
		head, tail = s[:i], s[i+1:]
	}
	return
}

func splitOptionArgs(s string) (option, args string) {
	if i := strings.IndexByte(s, '('); i >= 0 {
		option = s[:i]
		args = s[i:]
	} else {
		option = s
		args = "()"
	}
	return
}

func parseDecimalArgs(args string) (scale, precision int, err error) {
	if !strings.HasPrefix(args, "(") || !strings.HasSuffix(args, ")") {
		return 0, 0, fmt.Errorf("malformed decimal args: %s", args)
	}
	args = strings.TrimPrefix(args, "(")
	args = strings.TrimSuffix(args, ")")
	parts := strings.Split(args, ":")
	if len(parts) != 2 {
		return 0, 0, fmt.Errorf("malformed decimal args: (%s)", args)
	}
	s, err := strconv.ParseInt(parts[0], 10, 32)
	if err != nil {
		return 0, 0, err
	}
	p, err := strconv.ParseInt(parts[1], 10, 32)
	if err != nil {
		return 0, 0, err
	}
	return int(s), int(p), nil
}

func parseTimestampArgs(args string) (TimeUnit, error) {
	if !strings.HasPrefix(args, "(") || !strings.HasSuffix(args, ")") {
		return nil, fmt.Errorf("malformed timestamp args: %s", args)
	}

	args = strings.TrimPrefix(args, "(")
	args = strings.TrimSuffix(args, ")")

	if len(args) == 0 {
		return Millisecond, nil
	}

	switch args {
	case "millisecond":
		return Millisecond, nil
	case "microsecond":
		return Microsecond, nil
	case "nanosecond":
		return Nanosecond, nil
	default:
	}

	return nil, fmt.Errorf("unknown time unit: %s", args)
}

type goNode struct {
	Node
	gotype reflect.Type
}

func (n *goNode) GoType() reflect.Type { return n.gotype }

var (
	_ RowGroupOption = (*Schema)(nil)
	_ ReaderOption   = (*Schema)(nil)
	_ WriterOption   = (*Schema)(nil)
)

func makeNodeOf(t reflect.Type, name string, tag []string) Node {
	var (
		node       Node
		optional   bool
		list       bool
		encoded    encoding.Encoding
		compressed compress.Codec
	)

	setNode := func(n Node) {
		if node != nil {
			throwInvalidNode(t, "struct field has multiple logical parquet types declared", name, tag...)
		}
		node = n
	}

	setOptional := func() {
		if optional {
			throwInvalidNode(t, "struct field has multiple declaration of the optional tag", name, tag...)
		}
		optional = true
	}

	setList := func() {
		if list {
			throwInvalidNode(t, "struct field has multiple declaration of the list tag", name, tag...)
		}
		list = true
	}

	setEncoding := func(e encoding.Encoding) {
		if encoded != nil {
			throwInvalidNode(t, "struct field has encoding declared multiple time", name, tag...)
		}
		encoded = e
	}

	setCompression := func(c compress.Codec) {
		if compressed != nil {
			throwInvalidNode(t, "struct field has compression codecs declared multiple times", name, tag...)
		}
		compressed = c
	}

	forEachTagOption(tag, func(option, args string) {
		if t.Kind() == reflect.Map {
			node = nodeOf(t, tag)
			return
		}
		switch option {
		case "":
			return
		case "optional":
			setOptional()

		case "snappy":
			setCompression(&Snappy)

		case "gzip":
			setCompression(&Gzip)

		case "brotli":
			setCompression(&Brotli)

		case "lz4":
			setCompression(&Lz4Raw)

		case "zstd":
			setCompression(&Zstd)

		case "uncompressed":
			setCompression(&Uncompressed)

		case "plain":
			setEncoding(&Plain)

		case "dict":
			setEncoding(&RLEDictionary)

		case "json":
			setNode(JSON())

		case "delta":
			switch t.Kind() {
			case reflect.Int, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint32, reflect.Uint64:
				setEncoding(&DeltaBinaryPacked)
			case reflect.String:
				setEncoding(&DeltaByteArray)
			case reflect.Slice:
				if t.Elem().Kind() == reflect.Uint8 { // []byte?
					setEncoding(&DeltaByteArray)
				} else {
					throwInvalidTag(t, name, option)
				}
			case reflect.Array:
				if t.Elem().Kind() == reflect.Uint8 { // [N]byte?
					setEncoding(&DeltaByteArray)
				} else {
					throwInvalidTag(t, name, option)
				}
			default:
				throwInvalidTag(t, name, option)
			}

		case "split":
			switch t.Kind() {
			case reflect.Float32, reflect.Float64:
				setEncoding(&ByteStreamSplit)
			default:
				throwInvalidTag(t, name, option)
			}

		case "list":
			switch t.Kind() {
			case reflect.Slice:
				element := nodeOf(t.Elem(), nil)
				setNode(element)
				setList()
			default:
				throwInvalidTag(t, name, option)
			}

		case "enum":
			switch t.Kind() {
			case reflect.String:
				setNode(Enum())
			default:
				throwInvalidTag(t, name, option)
			}

		case "uuid":
			switch t.Kind() {
			case reflect.Array:
				if t.Elem().Kind() != reflect.Uint8 || t.Len() != 16 {
					throwInvalidTag(t, name, option)
				}
			default:
				throwInvalidTag(t, name, option)
			}

		case "decimal":
			scale, precision, err := parseDecimalArgs(args)
			if err != nil {
				throwInvalidTag(t, name, option+args)
			}
			var baseType Type
			switch t.Kind() {
			case reflect.Int32:
				baseType = Int32Type
			case reflect.Int64:
				baseType = Int64Type
			case reflect.Array, reflect.Slice:
				baseType = FixedLenByteArrayType(decimalFixedLenByteArraySize(precision))
			default:
				throwInvalidTag(t, name, option)
			}

			setNode(Decimal(scale, precision, baseType))
		case "date":
			switch t.Kind() {
			case reflect.Int32:
				setNode(Date())
			default:
				throwInvalidTag(t, name, option)
			}
		case "timestamp":
			switch t.Kind() {
			case reflect.Int64:
				timeUnit, err := parseTimestampArgs(args)
				if err != nil {
					throwInvalidTag(t, name, option)
				}
				setNode(Timestamp(timeUnit))
			default:
				switch t {
				case reflect.TypeOf(time.Time{}):
					timeUnit, err := parseTimestampArgs(args)
					if err != nil {
						throwInvalidTag(t, name, option)
					}
					setNode(Timestamp(timeUnit))
				default:
					throwInvalidTag(t, name, option)
				}
			}
		default:
			throwUnknownTag(t, name, option)
		}
	})

	// Special case: an "optional" struct tag on a slice applies to the
	// individual items, not the overall list. The least messy way to
	// deal with this is at this level, instead of passing down optional
	// information into the nodeOf function, and then passing back whether an
	// optional tag was applied.
	if node == nil && t.Kind() == reflect.Slice {
		isUint8 := t.Elem().Kind() == reflect.Uint8
		// Note for strings "optional" applies only to the entire BYTE_ARRAY and
		// not each individual byte.
		if optional && !isUint8 {
			node = Repeated(Optional(nodeOf(t.Elem(), tag)))
			// Don't also apply "optional" to the whole list.
			optional = false
		}
	}

	if node == nil {
		node = nodeOf(t, tag)
	}

	if compressed != nil {
		node = Compressed(node, compressed)
	}

	if encoded != nil {
		node = Encoded(node, encoded)
	}

	if list {
		node = List(node)
	}

	if node.Repeated() && !list {
		elemKind := node.GoType().Elem().Kind()
		if elemKind == reflect.Slice {
			panic("unhandled nested slice on parquet schema without list tag")
		}
	}

	if optional {
		node = Optional(node)
	}

	return node
}

func forEachTagOption(tags []string, do func(option, args string)) {
	for _, tag := range tags {
		_, tag = split(tag) // skip the field name
		for tag != "" {
			option := ""
			option, tag = split(tag)
			var args string
			option, args = splitOptionArgs(option)
			do(option, args)
		}
	}
}


================================================
FILE: schema_test.go
================================================
package parquet_test

import (
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestSchemaOf(t *testing.T) {
	tests := []struct {
		value interface{}
		print string
	}{
		{
			value: new(struct{ Name string }),
			print: `message {
	required binary Name (STRING);
}`,
		},

		{
			value: new(struct {
				X int
				Y int
			}),
			print: `message {
	required int64 X (INT(64,true));
	required int64 Y (INT(64,true));
}`,
		},

		{
			value: new(struct {
				X float32
				Y float32
			}),
			print: `message {
	required float X;
	required float Y;
}`,
		},

		{
			value: new(struct {
				Inner struct {
					FirstName string `parquet:"first_name"`
					LastName  string `parquet:"last_name"`
				} `parquet:"inner,optional"`
			}),
			print: `message {
	optional group inner {
		required binary first_name (STRING);
		required binary last_name (STRING);
	}
}`,
		},

		{
			value: new(struct {
				Short float32 `parquet:"short,split"`
				Long  float64 `parquet:"long,split"`
			}),
			print: `message {
	required float short;
	required double long;
}`,
		},

		{
			value: new(struct {
				Inner struct {
					FirstName          string `parquet:"first_name"`
					ShouldNotBePresent string `parquet:"-"`
				} `parquet:"inner,optional"`
			}),
			print: `message {
	optional group inner {
		required binary first_name (STRING);
	}
}`,
		},

		{
			value: new(struct {
				Inner struct {
					FirstName    string `parquet:"first_name"`
					MyNameIsDash string `parquet:"-,"`
				} `parquet:"inner,optional"`
			}),
			print: `message {
	optional group inner {
		required binary first_name (STRING);
		required binary - (STRING);
	}
}`,
		},

		{
			value: new(struct {
				Inner struct {
					TimestampMillis int64 `parquet:"timestamp_millis,timestamp"`
					TimestampMicros int64 `parquet:"timestamp_micros,timestamp(microsecond)"`
				} `parquet:"inner,optional"`
			}),
			print: `message {
	optional group inner {
		required int64 timestamp_millis (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
		required int64 timestamp_micros (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
	}
}`,
		},

		{
			value: new(struct {
				Name string `parquet:",json"`
			}),
			print: `message {
	required binary Name (JSON);
}`,
		},

		{
			value: new(struct {
				A map[int64]string `parquet:"," parquet-key:",timestamp"`
				B map[int64]string
			}),
			print: `message {
	required group A (MAP) {
		repeated group key_value {
			required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MILLIS));
			required binary value (STRING);
		}
	}
	required group B (MAP) {
		repeated group key_value {
			required int64 key (INT(64,true));
			required binary value (STRING);
		}
	}
}`,
		},

		{
			value: new(struct {
				A map[int64]string `parquet:",optional" parquet-value:",json"`
			}),
			print: `message {
	optional group A (MAP) {
		repeated group key_value {
			required int64 key (INT(64,true));
			required binary value (JSON);
		}
	}
}`,
		},

		{
			value: new(struct {
				A map[int64]string `parquet:",optional"`
			}),
			print: `message {
	optional group A (MAP) {
		repeated group key_value {
			required int64 key (INT(64,true));
			required binary value (STRING);
		}
	}
}`,
		},

		{
			value: new(struct {
				A map[int64]string `parquet:",optional" parquet-value:",json" parquet-key:",timestamp(microsecond)"`
			}),
			print: `message {
	optional group A (MAP) {
		repeated group key_value {
			required int64 key (TIMESTAMP(isAdjustedToUTC=true,unit=MICROS));
			required binary value (JSON);
		}
	}
}`,
		},
	}

	for _, test := range tests {
		t.Run("", func(t *testing.T) {
			schema := parquet.SchemaOf(test.value)

			if s := schema.String(); s != test.print {
				t.Errorf("\nexpected:\n\n%s\n\nfound:\n\n%s\n", test.print, s)
			}
		})
	}
}


================================================
FILE: search.go
================================================
package parquet

// Search is like Find, but uses the default ordering of the given type. Search
// and Find are scoped to a given ColumnChunk and find the pages within a
// ColumnChunk which might contain the result.  See Find for more details.
func Search(index ColumnIndex, value Value, typ Type) int {
	return Find(index, value, CompareNullsLast(typ.Compare))
}

// Find uses the ColumnIndex passed as argument to find the page in a column
// chunk (determined by the given ColumnIndex) that the given value is expected
// to be found in.
//
// The function returns the index of the first page that might contain the
// value. If the function determines that the value does not exist in the
// index, NumPages is returned.
//
// If you want to search the entire parquet file, you must iterate over the
// RowGroups and search each one individually, if there are multiple in the
// file. If you call writer.Flush before closing the file, then you will have
// multiple RowGroups to iterate over, otherwise Flush is called once on Close.
//
// The comparison function passed as last argument is used to determine the
// relative order of values. This should generally be the Compare method of
// the column type, but can sometimes be customized to modify how null values
// are interpreted, for example:
//
//	pageIndex := parquet.Find(columnIndex, value,
//		parquet.CompareNullsFirst(typ.Compare),
//	)
func Find(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
	switch {
	case index.IsAscending():
		return binarySearch(index, value, cmp)
	default:
		return linearSearch(index, value, cmp)
	}
}

func binarySearch(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
	n := index.NumPages()
	curIdx := 0
	topIdx := n

	// while there's at least one more page to check
	for (topIdx - curIdx) > 1 {

		// nextIdx is set to halfway between curIdx and topIdx
		nextIdx := ((topIdx - curIdx) / 2) + curIdx

		smallerThanMin := cmp(value, index.MinValue(nextIdx))

		switch {
		// search below pages[nextIdx]
		case smallerThanMin < 0:
			topIdx = nextIdx
		// search pages[nextIdx] and above
		case smallerThanMin > 0:
			curIdx = nextIdx
		case smallerThanMin == 0:
			// this case is hit when winValue == value of nextIdx
			// we must check below this index to find if there's
			// another page before this.
			// e.g. searching for first page 3 is in:
			// [1,2,3]
			// [3,4,5]
			// [6,7,8]

			// if the page proceeding this has a maxValue matching the value we're
			// searching, continue the search.
			// otherwise, we can return early
			//
			// cases covered by else block
			// if cmp(value, index.MaxValue(nextIdx-1)) < 0: the value is only in this page
			// if cmp(value, index.MaxValue(nextIdx-1)) > 0: we've got a sorting problem with overlapping pages
			//
			// bounds check not needed for nextIdx-1 because nextIdx is guaranteed to be at least curIdx + 1
			// line 82 & 85 above
			if cmp(value, index.MaxValue(nextIdx-1)) == 0 {
				topIdx = nextIdx
			} else {
				return nextIdx
			}
		}
	}

	// last page check, if it wasn't explicitly found above
	if curIdx < n {

		// check pages[curIdx] for value
		min := index.MinValue(curIdx)
		max := index.MaxValue(curIdx)

		// if value is not in pages[curIdx], then it's not in this columnChunk
		if cmp(value, min) < 0 || cmp(value, max) > 0 {
			curIdx = n
		}
	}

	return curIdx
}

func linearSearch(index ColumnIndex, value Value, cmp func(Value, Value) int) int {
	n := index.NumPages()

	for i := 0; i < n; i++ {
		min := index.MinValue(i)
		max := index.MaxValue(i)

		if cmp(min, value) <= 0 && cmp(value, max) <= 0 {
			return i
		}
	}

	return n
}


================================================
FILE: search_test.go
================================================
package parquet_test

import (
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestSearchBinary(t *testing.T) {
	testSearch(t, [][]int32{
		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
		{10, 10, 10, 10},
		{21, 22, 24, 25, 30},
		{30, 30},
		{30, 31},
		{32},
		{42, 43, 44, 45, 46, 47, 48, 49},
	}, [][]int{
		{10, 1},
		{0, 0},
		{9, 0},
		// non-existant, but would be in this page
		{23, 2},
		// ensure we find the first page
		{30, 2},
		{31, 4},
		// out of bounds
		{99, 7},
		// out of bounds
		{-1, 7},
	})
}

func TestSearchLinear(t *testing.T) {
	testSearch(t, [][]int32{
		{10, 10, 10, 10},
		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
		{21, 22, 23, 24, 25},
		{19, 18, 17, 16, 14, 13, 12, 11},
		{42, 43, 44, 45, 46, 47, 48, 49},
	}, [][]int{
		{10, 0},
		{0, 1},
		{9, 1},
		{48, 4},
		// non-existant, but could be in this page
		{15, 3},
		// out of bounds
		{99, 5},
		// out of bounds
		{-1, 5},
	})
}

func testSearch(t *testing.T, pages [][]int32, expectIndex [][]int) {
	indexer := parquet.Int32Type.NewColumnIndexer(0)

	for _, values := range pages {
		min := values[0]
		max := values[0]

		for _, v := range values[1:] {
			switch {
			case v < min:
				min = v
			case v > max:
				max = v
			}
		}

		indexer.IndexPage(int64(len(values)), 0,
			parquet.ValueOf(min),
			parquet.ValueOf(max),
		)
	}

	formatIndex := indexer.ColumnIndex()
	columnIndex := parquet.NewColumnIndex(parquet.Int32, &formatIndex)

	for _, values := range expectIndex {
		v := parquet.ValueOf(values[0])
		j := parquet.Search(columnIndex, v, parquet.Int32Type)

		if values[1] != j {
			t.Errorf("searching for value %v: got=%d want=%d", v, j, values[1])
		}
	}
}


================================================
FILE: sorting.go
================================================
//go:build go1.18

package parquet

import (
	"io"
	"sort"
)

// SortingWriter is a type similar to GenericWriter but it ensures that rows
// are sorted according to the sorting columns configured on the writer.
//
// The writer accumulates rows in an in-memory buffer which is sorted when it
// reaches the target number of rows, then written to a temporary row group.
// When the writer is flushed or closed, the temporary row groups are merged
// into a row group in the output file, ensuring that rows remain sorted in the
// final row group.
//
// Because row groups get encoded and compressed, they hold a lot less memory
// than if all rows were retained in memory. Sorting then merging rows chunks
// also tends to be a lot more efficient than sorting all rows in memory as it
// results in better CPU cache utilization since sorting multi-megabyte arrays
// causes a lot of cache misses since the data set cannot be held in CPU caches.
type SortingWriter[T any] struct {
	rowbuf  *RowBuffer[T]
	writer  *GenericWriter[T]
	output  *GenericWriter[T]
	buffer  io.ReadWriteSeeker
	maxRows int64
	numRows int64
	sorting SortingConfig
	dedupe  dedupe
}

// NewSortingWriter constructs a new sorting writer which writes a parquet file
// where rows of each row group are ordered according to the sorting columns
// configured on the writer.
//
// The sortRowCount argument defines the target number of rows that will be
// sorted in memory before being written to temporary row groups. The greater
// this value the more memory is needed to buffer rows in memory. Choosing a
// value that is too small limits the maximum number of rows that can exist in
// the output file since the writer cannot create more than 32K temporary row
// groups to hold the sorted row chunks.
func NewSortingWriter[T any](output io.Writer, sortRowCount int64, options ...WriterOption) *SortingWriter[T] {
	config, err := NewWriterConfig(options...)
	if err != nil {
		panic(err)
	}
	return &SortingWriter[T]{
		rowbuf: NewRowBuffer[T](&RowGroupConfig{
			Schema:  config.Schema,
			Sorting: config.Sorting,
		}),
		writer: NewGenericWriter[T](io.Discard, &WriterConfig{
			CreatedBy:            config.CreatedBy,
			ColumnPageBuffers:    config.ColumnPageBuffers,
			ColumnIndexSizeLimit: config.ColumnIndexSizeLimit,
			PageBufferSize:       config.PageBufferSize,
			WriteBufferSize:      config.WriteBufferSize,
			DataPageVersion:      config.DataPageVersion,
			Schema:               config.Schema,
			Compression:          config.Compression,
			Sorting:              config.Sorting,
		}),
		output:  NewGenericWriter[T](output, config),
		maxRows: sortRowCount,
		sorting: config.Sorting,
	}
}

func (w *SortingWriter[T]) Close() error {
	if err := w.Flush(); err != nil {
		return err
	}
	return w.output.Close()
}

func (w *SortingWriter[T]) Flush() error {
	defer w.resetSortingBuffer()

	if err := w.sortAndWriteBufferedRows(); err != nil {
		return err
	}

	if w.numRows == 0 {
		return nil
	}

	if err := w.writer.Close(); err != nil {
		return err
	}

	size, err := w.buffer.Seek(0, io.SeekCurrent)
	if err != nil {
		return err
	}

	f, err := OpenFile(newReaderAt(w.buffer), size,
		&FileConfig{
			SkipPageIndex:    true,
			SkipBloomFilters: true,
			ReadBufferSize:   defaultReadBufferSize,
		},
	)
	if err != nil {
		return err
	}

	m, err := MergeRowGroups(f.RowGroups(),
		&RowGroupConfig{
			Schema:  w.Schema(),
			Sorting: w.sorting,
		},
	)
	if err != nil {
		return err
	}

	rows := m.Rows()
	defer rows.Close()

	reader := RowReader(rows)
	if w.sorting.DropDuplicatedRows {
		reader = DedupeRowReader(rows, w.rowbuf.compare)
	}

	if _, err := CopyRows(w.output, reader); err != nil {
		return err
	}

	return w.output.Flush()
}

func (w *SortingWriter[T]) Reset(output io.Writer) {
	w.output.Reset(output)
	w.rowbuf.Reset()
	w.resetSortingBuffer()
}

func (w *SortingWriter[T]) resetSortingBuffer() {
	w.writer.Reset(io.Discard)
	w.numRows = 0

	if w.buffer != nil {
		w.sorting.SortingBuffers.PutBuffer(w.buffer)
		w.buffer = nil
	}
}

func (w *SortingWriter[T]) Write(rows []T) (int, error) {
	return w.writeRows(len(rows), func(i, j int) (int, error) { return w.rowbuf.Write(rows[i:j]) })
}

func (w *SortingWriter[T]) WriteRows(rows []Row) (int, error) {
	return w.writeRows(len(rows), func(i, j int) (int, error) { return w.rowbuf.WriteRows(rows[i:j]) })
}

func (w *SortingWriter[T]) writeRows(numRows int, writeRows func(i, j int) (int, error)) (int, error) {
	wn := 0

	for wn < numRows {
		if w.rowbuf.NumRows() >= w.maxRows {
			if err := w.sortAndWriteBufferedRows(); err != nil {
				return wn, err
			}
		}

		n := int(w.maxRows - w.rowbuf.NumRows())
		n += wn
		if n > numRows {
			n = numRows
		}

		n, err := writeRows(wn, n)
		wn += n

		if err != nil {
			return wn, err
		}
	}

	return wn, nil
}

func (w *SortingWriter[T]) SetKeyValueMetadata(key, value string) {
	w.output.SetKeyValueMetadata(key, value)
}

func (w *SortingWriter[T]) Schema() *Schema {
	return w.output.Schema()
}

func (w *SortingWriter[T]) sortAndWriteBufferedRows() error {
	if w.rowbuf.Len() == 0 {
		return nil
	}

	defer w.rowbuf.Reset()
	sort.Sort(w.rowbuf)

	if w.sorting.DropDuplicatedRows {
		w.rowbuf.rows = w.rowbuf.rows[:w.dedupe.deduplicate(w.rowbuf.rows, w.rowbuf.compare)]
		defer w.dedupe.reset()
	}

	rows := w.rowbuf.Rows()
	defer rows.Close()

	if w.buffer == nil {
		w.buffer = w.sorting.SortingBuffers.GetBuffer()
		w.writer.Reset(w.buffer)
	}

	n, err := CopyRows(w.writer, rows)
	if err != nil {
		return err
	}

	if err := w.writer.Flush(); err != nil {
		return err
	}

	w.numRows += n
	return nil
}


================================================
FILE: sorting_test.go
================================================
//go:build go1.18

package parquet_test

import (
	"bytes"
	"math/rand"
	"sort"
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestSortingWriter(t *testing.T) {
	type Row struct {
		Value int32 `parquet:"value"`
	}

	rows := make([]Row, 1000)
	for i := range rows {
		rows[i].Value = int32(i)
	}

	prng := rand.New(rand.NewSource(0))
	prng.Shuffle(len(rows), func(i, j int) {
		rows[i], rows[j] = rows[j], rows[i]
	})

	buffer := bytes.NewBuffer(nil)
	writer := parquet.NewSortingWriter[Row](buffer, 99,
		parquet.SortingWriterConfig(
			parquet.SortingColumns(
				parquet.Ascending("value"),
			),
		),
	)

	_, err := writer.Write(rows)
	if err != nil {
		t.Fatal(err)
	}

	if err := writer.Close(); err != nil {
		t.Fatal(err)
	}

	read, err := parquet.Read[Row](bytes.NewReader(buffer.Bytes()), int64(buffer.Len()))
	if err != nil {
		t.Fatal(err)
	}

	sort.Slice(rows, func(i, j int) bool {
		return rows[i].Value < rows[j].Value
	})

	assertRowsEqual(t, rows, read)
}

func TestSortingWriterDropDuplicatedRows(t *testing.T) {
	type Row struct {
		Value int32 `parquet:"value"`
	}

	rows := make([]Row, 1000)
	for i := range rows {
		rows[i].Value = int32(i / 2)
	}

	prng := rand.New(rand.NewSource(0))
	prng.Shuffle(len(rows), func(i, j int) {
		rows[i], rows[j] = rows[j], rows[i]
	})

	buffer := bytes.NewBuffer(nil)
	writer := parquet.NewSortingWriter[Row](buffer, 99,
		parquet.SortingWriterConfig(
			parquet.SortingBuffers(
				parquet.NewFileBufferPool("", "buffers.*"),
			),
			parquet.SortingColumns(
				parquet.Ascending("value"),
			),
			parquet.DropDuplicatedRows(true),
		),
	)

	_, err := writer.Write(rows)
	if err != nil {
		t.Fatal(err)
	}

	if err := writer.Close(); err != nil {
		t.Fatal(err)
	}

	read, err := parquet.Read[Row](bytes.NewReader(buffer.Bytes()), int64(buffer.Len()))
	if err != nil {
		t.Fatal(err)
	}

	sort.Slice(rows, func(i, j int) bool {
		return rows[i].Value < rows[j].Value
	})

	n := len(rows) / 2
	for i := range rows[:n] {
		rows[i] = rows[2*i]
	}

	assertRowsEqual(t, rows[:n], read)
}


================================================
FILE: sparse/array.go
================================================
package sparse

import (
	"time"
	"unsafe"
)

type Array struct{ array }

func UnsafeArray(base unsafe.Pointer, length int, offset uintptr) Array {
	return Array{makeArray(base, uintptr(length), offset)}
}

func (a Array) Len() int                   { return int(a.len) }
func (a Array) Index(i int) unsafe.Pointer { return a.index(i) }
func (a Array) Slice(i, j int) Array       { return Array{a.slice(i, j)} }
func (a Array) Offset(off uintptr) Array   { return Array{a.offset(off)} }
func (a Array) BoolArray() BoolArray       { return BoolArray{a.array} }
func (a Array) Int8Array() Int8Array       { return Int8Array{a.array} }
func (a Array) Int16Array() Int16Array     { return Int16Array{a.array} }
func (a Array) Int32Array() Int32Array     { return Int32Array{a.array} }
func (a Array) Int64Array() Int64Array     { return Int64Array{a.array} }
func (a Array) Float32Array() Float32Array { return Float32Array{a.array} }
func (a Array) Float64Array() Float64Array { return Float64Array{a.array} }
func (a Array) Uint8Array() Uint8Array     { return Uint8Array{a.array} }
func (a Array) Uint16Array() Uint16Array   { return Uint16Array{a.array} }
func (a Array) Uint32Array() Uint32Array   { return Uint32Array{a.array} }
func (a Array) Uint64Array() Uint64Array   { return Uint64Array{a.array} }
func (a Array) Uint128Array() Uint128Array { return Uint128Array{a.array} }
func (a Array) StringArray() StringArray   { return StringArray{a.array} }
func (a Array) TimeArray() TimeArray       { return TimeArray{a.array} }

type array struct {
	ptr unsafe.Pointer
	len uintptr
	off uintptr
}

func makeArray(base unsafe.Pointer, length, offset uintptr) array {
	return array{ptr: base, len: length, off: offset}
}

func (a array) index(i int) unsafe.Pointer {
	if uintptr(i) >= a.len {
		panic("index out of bounds")
	}
	return unsafe.Add(a.ptr, a.off*uintptr(i))
}

func (a array) slice(i, j int) array {
	if uintptr(i) > a.len || uintptr(j) > a.len || i > j {
		panic("slice index out of bounds")
	}
	return array{
		ptr: unsafe.Add(a.ptr, a.off*uintptr(i)),
		len: uintptr(j - i),
		off: a.off,
	}
}

func (a array) offset(off uintptr) array {
	if a.ptr == nil {
		panic("offset of nil array")
	}
	return array{
		ptr: unsafe.Add(a.ptr, off),
		len: a.len,
		off: a.off,
	}
}

type BoolArray struct{ array }

func MakeBoolArray(values []bool) BoolArray {
	return BoolArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 1)}
}

func UnsafeBoolArray(base unsafe.Pointer, length int, offset uintptr) BoolArray {
	return BoolArray{makeArray(base, uintptr(length), offset)}
}

func (a BoolArray) Len() int                 { return int(a.len) }
func (a BoolArray) Index(i int) bool         { return *(*byte)(a.index(i)) != 0 }
func (a BoolArray) Slice(i, j int) BoolArray { return BoolArray{a.slice(i, j)} }
func (a BoolArray) Uint8Array() Uint8Array   { return Uint8Array{a.array} }
func (a BoolArray) UnsafeArray() Array       { return Array{a.array} }

type Int8Array struct{ array }

func MakeInt8Array(values []int8) Int8Array {
	return Int8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
}

func UnsafeInt8Array(base unsafe.Pointer, length int, offset uintptr) Int8Array {
	return Int8Array{makeArray(base, uintptr(length), offset)}
}

func (a Int8Array) Len() int                 { return int(a.len) }
func (a Int8Array) Index(i int) int8         { return *(*int8)(a.index(i)) }
func (a Int8Array) Slice(i, j int) Int8Array { return Int8Array{a.slice(i, j)} }
func (a Int8Array) Uint8Array() Uint8Array   { return Uint8Array{a.array} }
func (a Int8Array) UnsafeArray() Array       { return Array{a.array} }

type Int16Array struct{ array }

func MakeInt16Array(values []int16) Int16Array {
	return Int16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
}

func UnsafeInt16Array(base unsafe.Pointer, length int, offset uintptr) Int16Array {
	return Int16Array{makeArray(base, uintptr(length), offset)}
}

func (a Int16Array) Len() int                  { return int(a.len) }
func (a Int16Array) Index(i int) int16         { return *(*int16)(a.index(i)) }
func (a Int16Array) Slice(i, j int) Int16Array { return Int16Array{a.slice(i, j)} }
func (a Int16Array) Int8Array() Int8Array      { return Int8Array{a.array} }
func (a Int16Array) Uint8Array() Uint8Array    { return Uint8Array{a.array} }
func (a Int16Array) Uint16Array() Uint16Array  { return Uint16Array{a.array} }
func (a Int16Array) UnsafeArray() Array        { return Array{a.array} }

type Int32Array struct{ array }

func MakeInt32Array(values []int32) Int32Array {
	return Int32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)}
}

func UnsafeInt32Array(base unsafe.Pointer, length int, offset uintptr) Int32Array {
	return Int32Array{makeArray(base, uintptr(length), offset)}
}

func (a Int32Array) Len() int                  { return int(a.len) }
func (a Int32Array) Index(i int) int32         { return *(*int32)(a.index(i)) }
func (a Int32Array) Slice(i, j int) Int32Array { return Int32Array{a.slice(i, j)} }
func (a Int32Array) Int8Array() Int8Array      { return Int8Array{a.array} }
func (a Int32Array) Int16Array() Int16Array    { return Int16Array{a.array} }
func (a Int32Array) Uint8Array() Uint8Array    { return Uint8Array{a.array} }
func (a Int32Array) Uint16Array() Uint16Array  { return Uint16Array{a.array} }
func (a Int32Array) Uint32Array() Uint32Array  { return Uint32Array{a.array} }
func (a Int32Array) UnsafeArray() Array        { return Array{a.array} }

type Int64Array struct{ array }

func MakeInt64Array(values []int64) Int64Array {
	return Int64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
}

func UnsafeInt64Array(base unsafe.Pointer, length int, offset uintptr) Int64Array {
	return Int64Array{makeArray(base, uintptr(length), offset)}
}

func (a Int64Array) Len() int                  { return int(a.len) }
func (a Int64Array) Index(i int) int64         { return *(*int64)(a.index(i)) }
func (a Int64Array) Slice(i, j int) Int64Array { return Int64Array{a.slice(i, j)} }
func (a Int64Array) Int8Array() Int8Array      { return Int8Array{a.array} }
func (a Int64Array) Int16Array() Int16Array    { return Int16Array{a.array} }
func (a Int64Array) Int32Array() Int32Array    { return Int32Array{a.array} }
func (a Int64Array) Uint8Array() Uint8Array    { return Uint8Array{a.array} }
func (a Int64Array) Uint16Array() Uint16Array  { return Uint16Array{a.array} }
func (a Int64Array) Uint32Array() Uint32Array  { return Uint32Array{a.array} }
func (a Int64Array) Uint64Array() Uint64Array  { return Uint64Array{a.array} }
func (a Int64Array) UnsafeArray() Array        { return Array{a.array} }

type Float32Array struct{ array }

func MakeFloat32Array(values []float32) Float32Array {
	return Float32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)}
}

func UnsafeFloat32Array(base unsafe.Pointer, length int, offset uintptr) Float32Array {
	return Float32Array{makeArray(base, uintptr(length), offset)}
}

func (a Float32Array) Len() int                    { return int(a.len) }
func (a Float32Array) Index(i int) float32         { return *(*float32)(a.index(i)) }
func (a Float32Array) Slice(i, j int) Float32Array { return Float32Array{a.slice(i, j)} }
func (a Float32Array) Array() Array                { return Array{a.array} }
func (a Float32Array) Uint32Array() Uint32Array    { return Uint32Array{a.array} }
func (a Float32Array) UnsafeArray() Array          { return Array{a.array} }

type Float64Array struct{ array }

func MakeFloat64Array(values []float64) Float64Array {
	return Float64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
}

func UnsafeFloat64Array(base unsafe.Pointer, length int, offset uintptr) Float64Array {
	return Float64Array{makeArray(base, uintptr(length), offset)}
}

func (a Float64Array) Len() int                    { return int(a.len) }
func (a Float64Array) Index(i int) float64         { return *(*float64)(a.index(i)) }
func (a Float64Array) Slice(i, j int) Float64Array { return Float64Array{a.slice(i, j)} }
func (a Float64Array) Uint64Array() Uint64Array    { return Uint64Array{a.array} }
func (a Float64Array) UnsafeArray() Array          { return Array{a.array} }

type Uint8Array struct{ array }

func MakeUint8Array(values []uint8) Uint8Array {
	return Uint8Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
}

func UnsafeUint8Array(base unsafe.Pointer, length int, offset uintptr) Uint8Array {
	return Uint8Array{makeArray(base, uintptr(length), offset)}
}

func (a Uint8Array) Len() int                  { return int(a.len) }
func (a Uint8Array) Index(i int) uint8         { return *(*uint8)(a.index(i)) }
func (a Uint8Array) Slice(i, j int) Uint8Array { return Uint8Array{a.slice(i, j)} }
func (a Uint8Array) UnsafeArray() Array        { return Array{a.array} }

type Uint16Array struct{ array }

func MakeUint16Array(values []uint16) Uint16Array {
	return Uint16Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
}

func UnsafeUint16Array(base unsafe.Pointer, length int, offset uintptr) Uint16Array {
	return Uint16Array{makeArray(base, uintptr(length), offset)}
}

func (a Uint16Array) Len() int                   { return int(a.len) }
func (a Uint16Array) Index(i int) uint16         { return *(*uint16)(a.index(i)) }
func (a Uint16Array) Slice(i, j int) Uint16Array { return Uint16Array{a.slice(i, j)} }
func (a Uint16Array) Uint8Array() Uint8Array     { return Uint8Array{a.array} }
func (a Uint16Array) UnsafeArray() Array         { return Array{a.array} }

type Uint32Array struct{ array }

func MakeUint32Array(values []uint32) Uint32Array {
	return Uint32Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 4)}
}

func UnsafeUint32Array(base unsafe.Pointer, length int, offset uintptr) Uint32Array {
	return Uint32Array{makeArray(base, uintptr(length), offset)}
}

func (a Uint32Array) Len() int                   { return int(a.len) }
func (a Uint32Array) Index(i int) uint32         { return *(*uint32)(a.index(i)) }
func (a Uint32Array) Slice(i, j int) Uint32Array { return Uint32Array{a.slice(i, j)} }
func (a Uint32Array) Uint8Array() Uint8Array     { return Uint8Array{a.array} }
func (a Uint32Array) Uint16Array() Uint16Array   { return Uint16Array{a.array} }
func (a Uint32Array) UnsafeArray() Array         { return Array{a.array} }

type Uint64Array struct{ array }

func MakeUint64Array(values []uint64) Uint64Array {
	return Uint64Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 8)}
}

func UnsafeUint64Array(base unsafe.Pointer, length int, offset uintptr) Uint64Array {
	return Uint64Array{makeArray(base, uintptr(length), offset)}
}

func (a Uint64Array) Len() int                   { return int(a.len) }
func (a Uint64Array) Index(i int) uint64         { return *(*uint64)(a.index(i)) }
func (a Uint64Array) Slice(i, j int) Uint64Array { return Uint64Array{a.slice(i, j)} }
func (a Uint64Array) Uint8Array() Uint8Array     { return Uint8Array{a.array} }
func (a Uint64Array) Uint16Array() Uint16Array   { return Uint16Array{a.array} }
func (a Uint64Array) Uint32Array() Uint32Array   { return Uint32Array{a.array} }
func (a Uint64Array) UnsafeArray() Array         { return Array{a.array} }

type Uint128Array struct{ array }

func MakeUint128Array(values [][16]byte) Uint128Array {
	return Uint128Array{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), 16)}
}

func UnsafeUint128Array(base unsafe.Pointer, length int, offset uintptr) Uint128Array {
	return Uint128Array{makeArray(base, uintptr(length), offset)}
}

func (a Uint128Array) Len() int                    { return int(a.len) }
func (a Uint128Array) Index(i int) [16]byte        { return *(*[16]byte)(a.index(i)) }
func (a Uint128Array) Slice(i, j int) Uint128Array { return Uint128Array{a.slice(i, j)} }
func (a Uint128Array) Uint8Array() Uint8Array      { return Uint8Array{a.array} }
func (a Uint128Array) Uint16Array() Uint16Array    { return Uint16Array{a.array} }
func (a Uint128Array) Uint32Array() Uint32Array    { return Uint32Array{a.array} }
func (a Uint128Array) Uint64Array() Uint64Array    { return Uint64Array{a.array} }
func (a Uint128Array) UnsafeArray() Array          { return Array{a.array} }

type StringArray struct{ array }

func MakeStringArray(values []string) StringArray {
	const sizeOfString = unsafe.Sizeof("")
	return StringArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfString)}
}

func UnsafeStringArray(base unsafe.Pointer, length int, offset uintptr) StringArray {
	return StringArray{makeArray(base, uintptr(length), offset)}
}

func (a StringArray) Len() int                   { return int(a.len) }
func (a StringArray) Index(i int) string         { return *(*string)(a.index(i)) }
func (a StringArray) Slice(i, j int) StringArray { return StringArray{a.slice(i, j)} }
func (a StringArray) UnsafeArray() Array         { return Array{a.array} }

type TimeArray struct{ array }

func MakeTimeArray(values []time.Time) TimeArray {
	const sizeOfTime = unsafe.Sizeof(time.Time{})
	return TimeArray{makeArray(*(*unsafe.Pointer)(unsafe.Pointer(&values)), uintptr(len(values)), sizeOfTime)}
}

func UnsafeTimeArray(base unsafe.Pointer, length int, offset uintptr) TimeArray {
	return TimeArray{makeArray(base, uintptr(length), offset)}
}

func (a TimeArray) Len() int                 { return int(a.len) }
func (a TimeArray) Index(i int) time.Time    { return *(*time.Time)(a.index(i)) }
func (a TimeArray) Slice(i, j int) TimeArray { return TimeArray{a.slice(i, j)} }
func (a TimeArray) UnsafeArray() Array       { return Array{a.array} }


================================================
FILE: sparse/gather.go
================================================
package sparse

import "unsafe"

func GatherInt32(dst []int32, src Int32Array) int {
	return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array())
}

func GatherInt64(dst []int64, src Int64Array) int {
	return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array())
}

func GatherFloat32(dst []float32, src Float32Array) int {
	return GatherUint32(*(*[]uint32)(unsafe.Pointer(&dst)), src.Uint32Array())
}

func GatherFloat64(dst []float64, src Float64Array) int {
	return GatherUint64(*(*[]uint64)(unsafe.Pointer(&dst)), src.Uint64Array())
}

func GatherBits(dst []byte, src Uint8Array) int { return gatherBits(dst, src) }

func GatherUint32(dst []uint32, src Uint32Array) int { return gather32(dst, src) }

func GatherUint64(dst []uint64, src Uint64Array) int { return gather64(dst, src) }

func GatherUint128(dst [][16]byte, src Uint128Array) int { return gather128(dst, src) }

func GatherString(dst []string, src StringArray) int {
	n := min(len(dst), src.Len())

	for i := range dst[:n] {
		dst[i] = src.Index(i)
	}

	return n
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}


================================================
FILE: sparse/gather_amd64.go
================================================
//go:build !purego

package sparse

import (
	"golang.org/x/sys/cpu"
)

func gatherBits(dst []byte, src Uint8Array) int {
	n := min(len(dst)*8, src.Len())
	i := 0

	if n >= 8 {
		i = (n / 8) * 8
		// Make sure `offset` is at least 4 bytes, otherwise VPGATHERDD may read
		// data beyond the end of the program memory and trigger a fault.
		//
		// If the boolean values do not have enough padding we must fallback to
		// the scalar algorithm to be able to load single bytes from memory.
		if src.off >= 4 && cpu.X86.HasAVX2 {
			gatherBitsAVX2(dst, src.Slice(0, i))
		} else {
			gatherBitsDefault(dst, src.Slice(0, i))
		}
	}

	for i < n {
		x := i / 8
		y := i % 8
		b := src.Index(i)
		dst[x] = ((b & 1) << y) | (dst[x] & ^(1 << y))
		i++
	}

	return n
}

func gather32(dst []uint32, src Uint32Array) int {
	n := min(len(dst), src.Len())
	i := 0

	if n >= 16 && cpu.X86.HasAVX2 {
		i = (n / 8) * 8
		gather32AVX2(dst[:i:i], src)
	}

	for i < n {
		dst[i] = src.Index(i)
		i++
	}

	return n
}

func gather64(dst []uint64, src Uint64Array) int {
	n := min(len(dst), src.Len())
	i := 0

	if n >= 8 && cpu.X86.HasAVX2 {
		i = (n / 4) * 4
		gather64AVX2(dst[:i:i], src)
	}

	for i < n {
		dst[i] = src.Index(i)
		i++
	}

	return n
}

//go:noescape
func gatherBitsAVX2(dst []byte, src Uint8Array)

//go:noescape
func gatherBitsDefault(dst []byte, src Uint8Array)

//go:noescape
func gather32AVX2(dst []uint32, src Uint32Array)

//go:noescape
func gather64AVX2(dst []uint64, src Uint64Array)

//go:noescape
func gather128(dst [][16]byte, src Uint128Array) int


================================================
FILE: sparse/gather_amd64.s
================================================
//go:build !purego

#include "textflag.h"

// func gatherBitsAVX2(dst []byte, src Uint8Array)
TEXT ·gatherBitsAVX2(SB), NOSPLIT, $0-48
    MOVQ dst_base+0(FP), AX
    MOVQ src_array_ptr+24(FP), BX
    MOVQ src_array_len+32(FP), CX
    MOVQ src_array_off+40(FP), DX
    XORQ SI, SI
    SHRQ $3, CX

    VPBROADCASTD src_array_off+40(FP), Y0
    VPMULLD range0n7<>(SB), Y0, Y0
    VPCMPEQD Y1, Y1, Y1
    VPCMPEQD Y2, Y2, Y2
loop:
    VPGATHERDD Y1, (BX)(Y0*1), Y3
    VMOVDQU Y2, Y1
    VPSLLD $31, Y3, Y3
    VMOVMSKPS Y3, DI

    MOVB DI, (AX)(SI*1)

    LEAQ (BX)(DX*8), BX
    INCQ SI
    CMPQ SI, CX
    JNE loop
    VZEROUPPER
    RET

// func gatherBitsDefault(dst []byte, src Uint8Array)
TEXT ·gatherBitsDefault(SB), NOSPLIT, $0-48
    MOVQ dst_base+0(FP), AX
    MOVQ src_array_ptr+24(FP), BX
    MOVQ src_array_len+32(FP), CX
    MOVQ src_array_off+40(FP), DX
    XORQ SI, SI
    SHRQ $3, CX
loop:
    LEAQ (BX)(DX*2), DI
    MOVBQZX (BX), R8
    MOVBQZX (BX)(DX*1), R9
    MOVBQZX (DI), R10
    MOVBQZX (DI)(DX*1), R11
    LEAQ (BX)(DX*4), BX
    LEAQ (DI)(DX*4), DI
    MOVBQZX (BX), R12
    MOVBQZX (BX)(DX*1), R13
    MOVBQZX (DI), R14
    MOVBQZX (DI)(DX*1), R15
    LEAQ (BX)(DX*4), BX

    ANDQ $1, R8
    ANDQ $1, R9
    ANDQ $1, R10
    ANDQ $1, R11
    ANDQ $1, R12
    ANDQ $1, R13
    ANDQ $1, R14
    ANDQ $1, R15

    SHLQ $1, R9
    SHLQ $2, R10
    SHLQ $3, R11
    SHLQ $4, R12
    SHLQ $5, R13
    SHLQ $6, R14
    SHLQ $7, R15

    ORQ R9, R8
    ORQ R11, R10
    ORQ R13, R12
    ORQ R15, R14
    ORQ R10, R8
    ORQ R12, R8
    ORQ R14, R8

    MOVB R8, (AX)(SI*1)

    INCQ SI
    CMPQ SI, CX
    JNE loop
    RET

// func gather32AVX2(dst []uint32, src Uint32Array)
TEXT ·gather32AVX2(SB), NOSPLIT, $0-48
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), CX
    MOVQ src_array_ptr+24(FP), BX
    MOVQ src_array_off+40(FP), DX
    XORQ SI, SI

    VPBROADCASTD src_array_off+40(FP), Y0
    VPMULLD range0n7<>(SB), Y0, Y0
    VPCMPEQD Y1, Y1, Y1
    VPCMPEQD Y2, Y2, Y2
loop:
    VPGATHERDD Y1, (BX)(Y0*1), Y3
    VMOVDQU Y3, (AX)(SI*4)
    VMOVDQU Y2, Y1

    LEAQ (BX)(DX*8), BX
    ADDQ $8, SI
    CMPQ SI, CX
    JNE loop
    VZEROUPPER
    RET

// func gather64AVX2(dst []uint64, src Uint64Array)
TEXT ·gather64AVX2(SB), NOSPLIT, $0-48
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), CX
    MOVQ src_array_ptr+24(FP), BX
    MOVQ src_array_off+40(FP), DX
    XORQ SI, SI

    VPBROADCASTQ src_array_off+40(FP), Y0
    VPMULLD range0n3<>(SB), Y0, Y0
    VPCMPEQQ Y1, Y1, Y1
    VPCMPEQQ Y2, Y2, Y2
loop:
    VPGATHERQQ Y1, (BX)(Y0*1), Y3
    VMOVDQU Y3, (AX)(SI*8)
    VMOVDQU Y2, Y1

    LEAQ (BX)(DX*4), BX
    ADDQ $4, SI
    CMPQ SI, CX
    JNE loop
    VZEROUPPER
    RET

// func gather128(dst [][16]byte, src Uint128Array) int
TEXT ·gather128(SB), NOSPLIT, $0-56
    MOVQ dst_base+0(FP), AX
    MOVQ dst_len+8(FP), CX
    MOVQ src_array_ptr+24(FP), BX
    MOVQ src_array_len+32(FP), DI
    MOVQ src_array_off+40(FP), DX
    XORQ SI, SI

    CMPQ DI, CX
    CMOVQLT DI, CX

    CMPQ CX, $0
    JE done

    CMPQ CX, $1
    JE tail

    XORQ SI, SI
    MOVQ CX, DI
    SHRQ $1, DI
    SHLQ $1, DI
loop:
    MOVOU (BX), X0
    MOVOU (BX)(DX*1), X1

    MOVOU X0, (AX)
    MOVOU X1, 16(AX)

    LEAQ (BX)(DX*2), BX
    ADDQ $32, AX
    ADDQ $2, SI
    CMPQ SI, DI
    JNE loop

    CMPQ SI, CX
    JE done
tail:
    MOVOU (BX), X0
    MOVOU X0, (AX)
done:
    MOVQ CX, ret+48(FP)
    RET

GLOBL range0n3<>(SB), RODATA|NOPTR, $32
DATA range0n3<>+0(SB)/8,  $0
DATA range0n3<>+8(SB)/8,  $1
DATA range0n3<>+16(SB)/8, $2
DATA range0n3<>+24(SB)/8, $3

GLOBL range0n7<>(SB), RODATA|NOPTR, $32
DATA range0n7<>+0(SB)/4,  $0
DATA range0n7<>+4(SB)/4,  $1
DATA range0n7<>+8(SB)/4,  $2
DATA range0n7<>+12(SB)/4, $3
DATA range0n7<>+16(SB)/4, $4
DATA range0n7<>+20(SB)/4, $5
DATA range0n7<>+24(SB)/4, $6
DATA range0n7<>+28(SB)/4, $7


================================================
FILE: sparse/gather_purego.go
================================================
//go:build purego || !amd64

package sparse

func gatherBits(dst []byte, src Uint8Array) int {
	n := min(len(dst)*8, src.Len())
	i := 0

	if k := (n / 8) * 8; k > 0 {
		for j := 0; i < k; j++ {
			b0 := src.Index(i + 0)
			b1 := src.Index(i + 1)
			b2 := src.Index(i + 2)
			b3 := src.Index(i + 3)
			b4 := src.Index(i + 4)
			b5 := src.Index(i + 5)
			b6 := src.Index(i + 6)
			b7 := src.Index(i + 7)

			dst[j] = (b0 & 1) |
				((b1 & 1) << 1) |
				((b2 & 1) << 2) |
				((b3 & 1) << 3) |
				((b4 & 1) << 4) |
				((b5 & 1) << 5) |
				((b6 & 1) << 6) |
				((b7 & 1) << 7)

			i += 8
		}
	}

	for i < n {
		x := i / 8
		y := i % 8
		b := src.Index(i)
		dst[x] = ((b & 1) << y) | (dst[x] & ^(1 << y))
		i++
	}

	return n
}

func gather32(dst []uint32, src Uint32Array) int {
	n := min(len(dst), src.Len())

	for i := range dst[:n] {
		dst[i] = src.Index(i)
	}

	return n
}

func gather64(dst []uint64, src Uint64Array) int {
	n := min(len(dst), src.Len())

	for i := range dst[:n] {
		dst[i] = src.Index(i)
	}

	return n
}

func gather128(dst [][16]byte, src Uint128Array) int {
	n := min(len(dst), src.Len())

	for i := range dst[:n] {
		dst[i] = src.Index(i)
	}

	return n
}


================================================
FILE: sparse/gather_test.go
================================================
package sparse_test

import (
	"encoding/binary"
	"fmt"
	"math"
	"strconv"
	"testing"
	"time"
	"unsafe"

	"github.com/segmentio/parquet-go/sparse"
)

const (
	benchmarkGatherPerLoop = 1000
)

func ExampleGatherUint32() {
	type point2D struct{ X, Y uint32 }

	buf := make([]point2D, 10)
	dst := make([]uint32, 10)
	src := sparse.UnsafeUint32Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	for i := range buf {
		buf[i].X = math.MaxUint32
		buf[i].Y = uint32(2 * i)
	}

	n := sparse.GatherUint32(dst, src)

	for i, v := range dst[:n] {
		fmt.Printf("points[%d].Y = %d\n", i, v)
	}

	// Output:
	// points[0].Y = 0
	// points[1].Y = 2
	// points[2].Y = 4
	// points[3].Y = 6
	// points[4].Y = 8
	// points[5].Y = 10
	// points[6].Y = 12
	// points[7].Y = 14
	// points[8].Y = 16
	// points[9].Y = 18
}

func ExampleGatherUint64() {
	type point2D struct{ X, Y uint64 }

	buf := make([]point2D, 10)
	dst := make([]uint64, 10)
	src := sparse.UnsafeUint64Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	for i := range buf {
		buf[i].X = math.MaxUint64
		buf[i].Y = uint64(2 * i)
	}

	n := sparse.GatherUint64(dst, src)

	for i, v := range dst[:n] {
		fmt.Printf("points[%d].Y = %v\n", i, v)
	}

	// Output:
	// points[0].Y = 0
	// points[1].Y = 2
	// points[2].Y = 4
	// points[3].Y = 6
	// points[4].Y = 8
	// points[5].Y = 10
	// points[6].Y = 12
	// points[7].Y = 14
	// points[8].Y = 16
	// points[9].Y = 18
}

func ExampleGatherUint128() {
	type point2D struct{ X, Y [16]byte }

	buf := make([]point2D, 10)
	dst := make([][16]byte, 10)
	src := sparse.UnsafeUint128Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	for i := range buf {
		x := uint64(math.MaxUint64)
		y := uint64(2 * i)
		binary.LittleEndian.PutUint64(buf[i].X[:], x)
		binary.LittleEndian.PutUint64(buf[i].Y[:], y)
	}

	n := sparse.GatherUint128(dst, src)

	for i, v := range dst[:n] {
		fmt.Printf("points[%d].Y = %v\n", i, binary.LittleEndian.Uint64(v[:]))
	}

	// Output:
	// points[0].Y = 0
	// points[1].Y = 2
	// points[2].Y = 4
	// points[3].Y = 6
	// points[4].Y = 8
	// points[5].Y = 10
	// points[6].Y = 12
	// points[7].Y = 14
	// points[8].Y = 16
	// points[9].Y = 18
}

func ExampleGatherString() {
	buf := make([][2]string, 10)
	dst := make([]string, 10)
	src := sparse.UnsafeStringArray(unsafe.Pointer(&buf[0][1]), len(buf), unsafe.Sizeof(buf[0]))

	for i := range buf {
		buf[i][0] = "-"
		buf[i][1] = strconv.Itoa(i)
	}

	n := sparse.GatherString(dst, src)

	for i, v := range dst[:n] {
		fmt.Printf("points[%d].Y = %v\n", i, v)
	}

	// Output:
	// points[0].Y = 0
	// points[1].Y = 1
	// points[2].Y = 2
	// points[3].Y = 3
	// points[4].Y = 4
	// points[5].Y = 5
	// points[6].Y = 6
	// points[7].Y = 7
	// points[8].Y = 8
	// points[9].Y = 9
}

func TestGatherUint32(t *testing.T) {
	type point2D struct{ X, Y uint32 }

	const N = 100
	buf := make([]point2D, N+1)
	dst := make([]uint32, N)
	src := sparse.UnsafeUint32Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	for i := range buf {
		buf[i].X = math.MaxUint32
		buf[i].Y = uint32(2 * i)
	}

	if n := sparse.GatherUint32(dst, src); n != N {
		t.Errorf("wrong number of values gathered: want=%d got=%d", N, n)
	}

	for i, v := range dst {
		if v != uint32(2*i) {
			t.Errorf("wrong value gathered at index %d: want=%d got=%d", i, 2*i, v)
		}
	}
}

func TestGatherUint64(t *testing.T) {
	type point2D struct{ X, Y uint64 }

	const N = 100
	buf := make([]point2D, N+1)
	dst := make([]uint64, N)
	src := sparse.UnsafeUint64Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	for i := range buf {
		buf[i].X = math.MaxUint64
		buf[i].Y = uint64(2 * i)
	}

	if n := sparse.GatherUint64(dst, src); n != N {
		t.Errorf("wrong number of values gathered: want=%d got=%d", N, n)
	}

	for i, v := range dst {
		if v != uint64(2*i) {
			t.Errorf("wrong value gathered at index %d: want=%d got=%d", i, 2*i, v)
		}
	}
}

func TestGatherUint128(t *testing.T) {
	type point2D struct{ X, Y [16]byte }

	const N = 100
	buf := make([]point2D, N+1)
	dst := make([][16]byte, N)
	src := sparse.UnsafeUint128Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	for i := range buf {
		x := uint64(math.MaxUint64)
		y := uint64(2 * i)
		binary.LittleEndian.PutUint64(buf[i].X[:], x)
		binary.LittleEndian.PutUint64(buf[i].Y[:], y)
	}

	if n := sparse.GatherUint128(dst, src); n != N {
		t.Errorf("wrong number of values gathered: want=%d got=%d", N, n)
	}

	for i, v := range dst {
		if y := binary.LittleEndian.Uint64(v[:]); y != uint64(2*i) {
			t.Errorf("wrong value gathered at index %d: want=%d got=%d", i, 2*i, y)
		}
	}
}

func BenchmarkGather32(b *testing.B) {
	type point2D struct{ X, Y uint32 }

	buf := make([]point2D, benchmarkGatherPerLoop)
	dst := make([]uint32, benchmarkGatherPerLoop)
	src := sparse.UnsafeUint32Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	b.SetBytes(4 * benchmarkGatherPerLoop)
	benchmarkThroughput(b, "gather", func() int {
		return sparse.GatherUint32(dst, src)
	})
}

func BenchmarkGather64(b *testing.B) {
	type point2D struct{ X, Y uint64 }

	buf := make([]point2D, benchmarkGatherPerLoop)
	dst := make([]uint64, benchmarkGatherPerLoop)
	src := sparse.UnsafeUint64Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	b.SetBytes(8 * benchmarkGatherPerLoop)
	benchmarkThroughput(b, "gather", func() int {
		return sparse.GatherUint64(dst, src)
	})
}

func BenchmarkGather128(b *testing.B) {
	type point2D struct{ X, Y [16]byte }

	buf := make([]point2D, benchmarkGatherPerLoop)
	dst := make([][16]byte, benchmarkGatherPerLoop)
	src := sparse.UnsafeUint128Array(unsafe.Pointer(&buf[0].Y), len(buf), unsafe.Sizeof(buf[0]))

	b.SetBytes(16 * benchmarkGatherPerLoop)
	benchmarkThroughput(b, "gather", func() int {
		return sparse.GatherUint128(dst, src)
	})
}

func benchmarkThroughput(b *testing.B, m string, f func() int) {
	start := time.Now()
	count := 0

	for i := 0; i < b.N; i++ {
		count += f()
	}

	seconds := time.Since(start).Seconds()
	b.ReportMetric(float64(count)/seconds, m+"/s")
}


================================================
FILE: sparse/sparse.go
================================================
// Package sparse contains abstractions to help work on arrays of values in
// sparse memory locations.
//
// Conversion between array types is supported when converting integers to a
// lower size (e.g. int32 to int16, or uint64 to uint8), or converting from
// signed integers to unsigned. Float types can also be converted to unsigned
// integers of the same size, in which case the conversion is similar to using
// the standard library's math.Float32bits and math.Float64bits functions.
//
// All array types can be converted to a generic Array type that can be used to erase
// type information and bypass type conversion rules. This conversion is similar
// to using Go's unsafe package to bypass Go's type system and should usually be
// avoided and a sign that the application is attempting to break type safety
// boundaries.
//
// The package provides Gather* functions which retrieve values from sparse
// arrays into contiguous memory buffers. On platforms that support it, these
// operations are implemented using SIMD gather instructions (e.g. VPGATHER on
// Intel CPUs).
package sparse


================================================
FILE: transform.go
================================================
package parquet

// TransformRowReader constructs a RowReader which applies the given transform
// to each row rad from reader.
//
// The transformation function appends the transformed src row to dst, returning
// dst and any error that occurred during the transformation. If dst is returned
// unchanged, the row is skipped.
func TransformRowReader(reader RowReader, transform func(dst, src Row) (Row, error)) RowReader {
	return &transformRowReader{reader: reader, transform: transform}
}

type transformRowReader struct {
	reader    RowReader
	transform func(Row, Row) (Row, error)
	rows      []Row
	offset    int
	length    int
}

func (t *transformRowReader) ReadRows(rows []Row) (n int, err error) {
	if len(t.rows) == 0 {
		t.rows = makeRows(len(rows))
	}

	for {
		for n < len(rows) && t.offset < t.length {
			dst := rows[n][:0]
			src := t.rows[t.offset]
			rows[n], err = t.transform(dst, src)
			if err != nil {
				return n, err
			}
			clearValues(src)
			t.rows[t.offset] = src[:0]
			t.offset++
			n++
		}

		if n == len(rows) {
			return n, nil
		}

		r, err := t.reader.ReadRows(t.rows)
		if r == 0 && err != nil {
			return n, err
		}
		t.offset = 0
		t.length = r
	}
}

type transformRowBuffer struct {
	buffer []Row
	offset int32
	length int32
}

func (b *transformRowBuffer) init(n int) {
	b.buffer = makeRows(n)
	b.offset = 0
	b.length = 0
}

func (b *transformRowBuffer) discard() {
	row := b.buffer[b.offset]
	clearValues(row)
	b.buffer[b.offset] = row[:0]

	if b.offset++; b.offset == b.length {
		b.reset(0)
	}
}

func (b *transformRowBuffer) reset(n int) {
	b.offset = 0
	b.length = int32(n)
}

func (b *transformRowBuffer) rows() []Row {
	return b.buffer[b.offset:b.length]
}

func (b *transformRowBuffer) cap() int {
	return len(b.buffer)
}

func (b *transformRowBuffer) len() int {
	return int(b.length - b.offset)
}

// TransformRowWriter constructs a RowWriter which applies the given transform
// to each row writter to writer.
//
// The transformation function appends the transformed src row to dst, returning
// dst and any error that occurred during the transformation. If dst is returned
// unchanged, the row is skipped.
func TransformRowWriter(writer RowWriter, transform func(dst, src Row) (Row, error)) RowWriter {
	return &transformRowWriter{writer: writer, transform: transform}
}

type transformRowWriter struct {
	writer    RowWriter
	transform func(Row, Row) (Row, error)
	rows      []Row
}

func (t *transformRowWriter) WriteRows(rows []Row) (n int, err error) {
	if len(t.rows) == 0 {
		t.rows = makeRows(len(rows))
	}

	for n < len(rows) {
		numRows := len(rows) - n
		if numRows > len(t.rows) {
			numRows = len(t.rows)
		}
		if err := t.writeRows(rows[n : n+numRows]); err != nil {
			return n, err
		}
		n += numRows
	}

	return n, nil
}

func (t *transformRowWriter) writeRows(rows []Row) (err error) {
	numRows := 0
	defer func() { clearRows(t.rows[:numRows]) }()

	for _, row := range rows {
		t.rows[numRows], err = t.transform(t.rows[numRows][:0], row)
		if err != nil {
			return err
		}
		if len(t.rows[numRows]) != 0 {
			numRows++
		}
	}

	_, err = t.writer.WriteRows(t.rows[:numRows])
	return err
}


================================================
FILE: transform_test.go
================================================
package parquet_test

import (
	"testing"

	"github.com/segmentio/parquet-go"
)

func TestTransformRowReader(t *testing.T) {
	rows := []parquet.Row{
		{parquet.Int64Value(0)},
		{parquet.Int64Value(1)},
		{parquet.Int64Value(2)},
		{parquet.Int64Value(3)},
		{parquet.Int64Value(4)},
	}

	want := []parquet.Row{
		{parquet.Int64Value(0), parquet.Int64Value(0).Level(0, 0, 1)},
		{parquet.Int64Value(1), parquet.Int64Value(2).Level(0, 0, 1)},
		{parquet.Int64Value(2), parquet.Int64Value(4).Level(0, 0, 1)},
		{parquet.Int64Value(3), parquet.Int64Value(6).Level(0, 0, 1)},
		{parquet.Int64Value(4), parquet.Int64Value(8).Level(0, 0, 1)},
	}

	reader := parquet.TransformRowReader(&bufferedRows{rows: rows},
		func(dst, src parquet.Row) (parquet.Row, error) {
			dst = append(dst, src[0])
			dst = append(dst, parquet.Int64Value(2*src[0].Int64()).Level(0, 0, 1))
			return dst, nil
		},
	)

	writer := &bufferedRows{}
	_, err := parquet.CopyRows(writer, reader)
	if err != nil {
		t.Fatal(err)
	}

	assertEqualRows(t, want, writer.rows)
}

func TestTransformRowWriter(t *testing.T) {
	rows := []parquet.Row{
		{parquet.Int64Value(0)},
		{parquet.Int64Value(1)},
		{parquet.Int64Value(2)},
		{parquet.Int64Value(3)},
		{parquet.Int64Value(4)},
	}

	want := []parquet.Row{
		{parquet.Int64Value(1)},
		{parquet.Int64Value(3)},
	}

	buffer := &bufferedRows{}
	writer := parquet.TransformRowWriter(buffer,
		func(dst, src parquet.Row) (parquet.Row, error) {
			if (src[0].Int64() % 2) != 0 {
				dst = append(dst, src[0])
			}
			return dst, nil
		},
	)

	reader := &bufferedRows{rows: rows}
	_, err := parquet.CopyRows(writer, reader)
	if err != nil {
		t.Fatal(err)
	}

	assertEqualRows(t, want, buffer.rows)
}


================================================
FILE: type.go
================================================
package parquet

import (
	"bytes"
	"encoding/json"
	"fmt"
	"math/bits"
	"reflect"
	"time"
	"unsafe"

	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

// Kind is an enumeration type representing the physical types supported by the
// parquet type system.
type Kind int8

const (
	Boolean           Kind = Kind(format.Boolean)
	Int32             Kind = Kind(format.Int32)
	Int64             Kind = Kind(format.Int64)
	Int96             Kind = Kind(format.Int96)
	Float             Kind = Kind(format.Float)
	Double            Kind = Kind(format.Double)
	ByteArray         Kind = Kind(format.ByteArray)
	FixedLenByteArray Kind = Kind(format.FixedLenByteArray)
)

// String returns a human-readable representation of the physical type.
func (k Kind) String() string { return format.Type(k).String() }

// Value constructs a value from k and v.
//
// The method panics if the data is not a valid representation of the value
// kind; for example, if the kind is Int32 but the data is not 4 bytes long.
func (k Kind) Value(v []byte) Value {
	x, err := parseValue(k, v)
	if err != nil {
		panic(err)
	}
	return x
}

// The Type interface represents logical types of the parquet type system.
//
// Types are immutable and therefore safe to access from multiple goroutines.
type Type interface {
	// Returns a human-readable representation of the parquet type.
	String() string

	// Returns the Kind value representing the underlying physical type.
	//
	// The method panics if it is called on a group type.
	Kind() Kind

	// For integer and floating point physical types, the method returns the
	// size of values in bits.
	//
	// For fixed-length byte arrays, the method returns the size of elements
	// in bytes.
	//
	// For other types, the value is zero.
	Length() int

	// Returns an estimation of the number of bytes required to hold the given
	// number of values of this type in memory.
	//
	// The method returns zero for group types.
	EstimateSize(numValues int) int

	// Returns an estimation of the number of values of this type that can be
	// held in the given byte size.
	//
	// The method returns zero for group types.
	EstimateNumValues(size int) int

	// Compares two values and returns a negative integer if a < b, positive if
	// a > b, or zero if a == b.
	//
	// The values' Kind must match the type, otherwise the result is undefined.
	//
	// The method panics if it is called on a group type.
	Compare(a, b Value) int

	// ColumnOrder returns the type's column order. For group types, this method
	// returns nil.
	//
	// The order describes the comparison logic implemented by the Less method.
	//
	// As an optimization, the method may return the same pointer across
	// multiple calls. Applications must treat the returned value as immutable,
	// mutating the value will result in undefined behavior.
	ColumnOrder() *format.ColumnOrder

	// Returns the physical type as a *format.Type value. For group types, this
	// method returns nil.
	//
	// As an optimization, the method may return the same pointer across
	// multiple calls. Applications must treat the returned value as immutable,
	// mutating the value will result in undefined behavior.
	PhysicalType() *format.Type

	// Returns the logical type as a *format.LogicalType value. When the logical
	// type is unknown, the method returns nil.
	//
	// As an optimization, the method may return the same pointer across
	// multiple calls. Applications must treat the returned value as immutable,
	// mutating the value will result in undefined behavior.
	LogicalType() *format.LogicalType

	// Returns the logical type's equivalent converted type. When there are
	// no equivalent converted type, the method returns nil.
	//
	// As an optimization, the method may return the same pointer across
	// multiple calls. Applications must treat the returned value as immutable,
	// mutating the value will result in undefined behavior.
	ConvertedType() *deprecated.ConvertedType

	// Creates a column indexer for values of this type.
	//
	// The size limit is a hint to the column indexer that it is allowed to
	// truncate the page boundaries to the given size. Only BYTE_ARRAY and
	// FIXED_LEN_BYTE_ARRAY types currently take this value into account.
	//
	// A value of zero or less means no limits.
	//
	// The method panics if it is called on a group type.
	NewColumnIndexer(sizeLimit int) ColumnIndexer

	// Creates a row group buffer column for values of this type.
	//
	// Column buffers are created using the index of the column they are
	// accumulating values in memory for (relative to the parent schema),
	// and the size of their memory buffer.
	//
	// The application may give an estimate of the number of values it expects
	// to write to the buffer as second argument. This estimate helps set the
	// initialize buffer capacity but is not a hard limit, the underlying memory
	// buffer will grown as needed to allow more values to be written. Programs
	// may use the Size method of the column buffer (or the parent row group,
	// when relevant) to determine how many bytes are being used, and perform a
	// flush of the buffers to a storage layer.
	//
	// The method panics if it is called on a group type.
	NewColumnBuffer(columnIndex, numValues int) ColumnBuffer

	// Creates a dictionary holding values of this type.
	//
	// The dictionary retains the data buffer, it does not make a copy of it.
	// If the application needs to share ownership of the memory buffer, it must
	// ensure that it will not be modified while the page is in use, or it must
	// make a copy of it prior to creating the dictionary.
	//
	// The method panics if the data type does not correspond to the parquet
	// type it is called on.
	NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary

	// Creates a page belonging to a column at the given index, backed by the
	// data buffer.
	//
	// The page retains the data buffer, it does not make a copy of it. If the
	// application needs to share ownership of the memory buffer, it must ensure
	// that it will not be modified while the page is in use, or it must make a
	// copy of it prior to creating the page.
	//
	// The method panics if the data type does not correspond to the parquet
	// type it is called on.
	NewPage(columnIndex, numValues int, data encoding.Values) Page

	// Creates an encoding.Values instance backed by the given buffers.
	//
	// The offsets is only used by BYTE_ARRAY types, where it represents the
	// positions of each variable length value in the values buffer.
	//
	// The following expression creates an empty instance for any type:
	//
	//		values := typ.NewValues(nil, nil)
	//
	// The method panics if it is called on group types.
	NewValues(values []byte, offsets []uint32) encoding.Values

	// Assuming the src buffer contains PLAIN encoded values of the type it is
	// called on, applies the given encoding and produces the output to the dst
	// buffer passed as first argument by dispatching the call to one of the
	// encoding methods.
	Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error)

	// Assuming the src buffer contains values encoding in the given encoding,
	// decodes the input and produces the encoded values into the dst output
	// buffer passed as first argument by dispatching the call to one of the
	// encoding methods.
	Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error)

	// Returns an estimation of the output size after decoding the values passed
	// as first argument with the given encoding.
	//
	// For most types, this is similar to calling EstimateSize with the known
	// number of encoded values. For variable size types, using this method may
	// provide a more precise result since it can inspect the input buffer.
	EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int

	// Assigns a Parquet value to a Go value. Returns an error if assignment is
	// not possible. The source Value must be an expected logical type for the
	// receiver. This can be accomplished using ConvertValue.
	AssignValue(dst reflect.Value, src Value) error

	// Convert a Parquet Value of the given Type into a Parquet Value that is
	// compatible with the receiver. The returned Value is suitable to be passed
	// to AssignValue.
	ConvertValue(val Value, typ Type) (Value, error)
}

var (
	BooleanType   Type = booleanType{}
	Int32Type     Type = int32Type{}
	Int64Type     Type = int64Type{}
	Int96Type     Type = int96Type{}
	FloatType     Type = floatType{}
	DoubleType    Type = doubleType{}
	ByteArrayType Type = byteArrayType{}
)

// In the current parquet version supported by this library, only type-defined
// orders are supported.
var typeDefinedColumnOrder = format.ColumnOrder{
	TypeOrder: new(format.TypeDefinedOrder),
}

var physicalTypes = [...]format.Type{
	0: format.Boolean,
	1: format.Int32,
	2: format.Int64,
	3: format.Int96,
	4: format.Float,
	5: format.Double,
	6: format.ByteArray,
	7: format.FixedLenByteArray,
}

var convertedTypes = [...]deprecated.ConvertedType{
	0:  deprecated.UTF8,
	1:  deprecated.Map,
	2:  deprecated.MapKeyValue,
	3:  deprecated.List,
	4:  deprecated.Enum,
	5:  deprecated.Decimal,
	6:  deprecated.Date,
	7:  deprecated.TimeMillis,
	8:  deprecated.TimeMicros,
	9:  deprecated.TimestampMillis,
	10: deprecated.TimestampMicros,
	11: deprecated.Uint8,
	12: deprecated.Uint16,
	13: deprecated.Uint32,
	14: deprecated.Uint64,
	15: deprecated.Int8,
	16: deprecated.Int16,
	17: deprecated.Int32,
	18: deprecated.Int64,
	19: deprecated.Json,
	20: deprecated.Bson,
	21: deprecated.Interval,
}

type booleanType struct{}

func (t booleanType) String() string                           { return "BOOLEAN" }
func (t booleanType) Kind() Kind                               { return Boolean }
func (t booleanType) Length() int                              { return 1 }
func (t booleanType) EstimateSize(n int) int                   { return (n + 7) / 8 }
func (t booleanType) EstimateNumValues(n int) int              { return 8 * n }
func (t booleanType) Compare(a, b Value) int                   { return compareBool(a.boolean(), b.boolean()) }
func (t booleanType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
func (t booleanType) LogicalType() *format.LogicalType         { return nil }
func (t booleanType) ConvertedType() *deprecated.ConvertedType { return nil }
func (t booleanType) PhysicalType() *format.Type               { return &physicalTypes[Boolean] }

func (t booleanType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newBooleanColumnIndexer()
}

func (t booleanType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newBooleanColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t booleanType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newBooleanDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t booleanType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newBooleanPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t booleanType) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.BooleanValues(values)
}

func (t booleanType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeBoolean(dst, src, enc)
}

func (t booleanType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeBoolean(dst, src, enc)
}

func (t booleanType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t booleanType) AssignValue(dst reflect.Value, src Value) error {
	v := src.boolean()
	switch dst.Kind() {
	case reflect.Bool:
		dst.SetBool(v)
	default:
		dst.Set(reflect.ValueOf(v))
	}
	return nil
}

func (t booleanType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *stringType:
		return convertStringToBoolean(val)
	}
	switch typ.Kind() {
	case Boolean:
		return val, nil
	case Int32:
		return convertInt32ToBoolean(val)
	case Int64:
		return convertInt64ToBoolean(val)
	case Int96:
		return convertInt96ToBoolean(val)
	case Float:
		return convertFloatToBoolean(val)
	case Double:
		return convertDoubleToBoolean(val)
	case ByteArray, FixedLenByteArray:
		return convertByteArrayToBoolean(val)
	default:
		return makeValueKind(Boolean), nil
	}
}

type int32Type struct{}

func (t int32Type) String() string                           { return "INT32" }
func (t int32Type) Kind() Kind                               { return Int32 }
func (t int32Type) Length() int                              { return 32 }
func (t int32Type) EstimateSize(n int) int                   { return 4 * n }
func (t int32Type) EstimateNumValues(n int) int              { return n / 4 }
func (t int32Type) Compare(a, b Value) int                   { return compareInt32(a.int32(), b.int32()) }
func (t int32Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
func (t int32Type) LogicalType() *format.LogicalType         { return nil }
func (t int32Type) ConvertedType() *deprecated.ConvertedType { return nil }
func (t int32Type) PhysicalType() *format.Type               { return &physicalTypes[Int32] }

func (t int32Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newInt32ColumnIndexer()
}

func (t int32Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newInt32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t int32Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newInt32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t int32Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newInt32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t int32Type) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.Int32ValuesFromBytes(values)
}

func (t int32Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeInt32(dst, src, enc)
}

func (t int32Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeInt32(dst, src, enc)
}

func (t int32Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t int32Type) AssignValue(dst reflect.Value, src Value) error {
	v := src.int32()
	switch dst.Kind() {
	case reflect.Int8, reflect.Int16, reflect.Int32:
		dst.SetInt(int64(v))
	case reflect.Uint8, reflect.Uint16, reflect.Uint32:
		dst.SetUint(uint64(v))
	default:
		dst.Set(reflect.ValueOf(v))
	}
	return nil
}

func (t int32Type) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *stringType:
		return convertStringToInt32(val)
	}
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToInt32(val)
	case Int32:
		return val, nil
	case Int64:
		return convertInt64ToInt32(val)
	case Int96:
		return convertInt96ToInt32(val)
	case Float:
		return convertFloatToInt32(val)
	case Double:
		return convertDoubleToInt32(val)
	case ByteArray, FixedLenByteArray:
		return convertByteArrayToInt32(val)
	default:
		return makeValueKind(Int32), nil
	}
}

type int64Type struct{}

func (t int64Type) String() string                           { return "INT64" }
func (t int64Type) Kind() Kind                               { return Int64 }
func (t int64Type) Length() int                              { return 64 }
func (t int64Type) EstimateSize(n int) int                   { return 8 * n }
func (t int64Type) EstimateNumValues(n int) int              { return n / 8 }
func (t int64Type) Compare(a, b Value) int                   { return compareInt64(a.int64(), b.int64()) }
func (t int64Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
func (t int64Type) LogicalType() *format.LogicalType         { return nil }
func (t int64Type) ConvertedType() *deprecated.ConvertedType { return nil }
func (t int64Type) PhysicalType() *format.Type               { return &physicalTypes[Int64] }

func (t int64Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newInt64ColumnIndexer()
}

func (t int64Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newInt64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t int64Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newInt64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t int64Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newInt64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t int64Type) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.Int64ValuesFromBytes(values)
}

func (t int64Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeInt64(dst, src, enc)
}

func (t int64Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeInt64(dst, src, enc)
}

func (t int64Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t int64Type) AssignValue(dst reflect.Value, src Value) error {
	v := src.int64()
	switch dst.Kind() {
	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
		dst.SetInt(v)
	case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
		dst.SetUint(uint64(v))
	default:
		dst.Set(reflect.ValueOf(v))
	}
	return nil
}

func (t int64Type) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *stringType:
		return convertStringToInt64(val)
	}
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToInt64(val)
	case Int32:
		return convertInt32ToInt64(val)
	case Int64:
		return val, nil
	case Int96:
		return convertInt96ToInt64(val)
	case Float:
		return convertFloatToInt64(val)
	case Double:
		return convertDoubleToInt64(val)
	case ByteArray, FixedLenByteArray:
		return convertByteArrayToInt64(val)
	default:
		return makeValueKind(Int64), nil
	}
}

type int96Type struct{}

func (t int96Type) String() string { return "INT96" }

func (t int96Type) Kind() Kind                               { return Int96 }
func (t int96Type) Length() int                              { return 96 }
func (t int96Type) EstimateSize(n int) int                   { return 12 * n }
func (t int96Type) EstimateNumValues(n int) int              { return n / 12 }
func (t int96Type) Compare(a, b Value) int                   { return compareInt96(a.int96(), b.int96()) }
func (t int96Type) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
func (t int96Type) LogicalType() *format.LogicalType         { return nil }
func (t int96Type) ConvertedType() *deprecated.ConvertedType { return nil }
func (t int96Type) PhysicalType() *format.Type               { return &physicalTypes[Int96] }

func (t int96Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newInt96ColumnIndexer()
}

func (t int96Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newInt96ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t int96Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newInt96Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t int96Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newInt96Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t int96Type) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.Int96ValuesFromBytes(values)
}

func (t int96Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeInt96(dst, src, enc)
}

func (t int96Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeInt96(dst, src, enc)
}

func (t int96Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t int96Type) AssignValue(dst reflect.Value, src Value) error {
	v := src.Int96()
	dst.Set(reflect.ValueOf(v))
	return nil
}

func (t int96Type) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *stringType:
		return convertStringToInt96(val)
	}
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToInt96(val)
	case Int32:
		return convertInt32ToInt96(val)
	case Int64:
		return convertInt64ToInt96(val)
	case Int96:
		return val, nil
	case Float:
		return convertFloatToInt96(val)
	case Double:
		return convertDoubleToInt96(val)
	case ByteArray, FixedLenByteArray:
		return convertByteArrayToInt96(val)
	default:
		return makeValueKind(Int96), nil
	}
}

type floatType struct{}

func (t floatType) String() string                           { return "FLOAT" }
func (t floatType) Kind() Kind                               { return Float }
func (t floatType) Length() int                              { return 32 }
func (t floatType) EstimateSize(n int) int                   { return 4 * n }
func (t floatType) EstimateNumValues(n int) int              { return n / 4 }
func (t floatType) Compare(a, b Value) int                   { return compareFloat32(a.float(), b.float()) }
func (t floatType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
func (t floatType) LogicalType() *format.LogicalType         { return nil }
func (t floatType) ConvertedType() *deprecated.ConvertedType { return nil }
func (t floatType) PhysicalType() *format.Type               { return &physicalTypes[Float] }

func (t floatType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newFloatColumnIndexer()
}

func (t floatType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newFloatColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t floatType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newFloatDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t floatType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newFloatPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t floatType) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.FloatValuesFromBytes(values)
}

func (t floatType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeFloat(dst, src, enc)
}

func (t floatType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeFloat(dst, src, enc)
}

func (t floatType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t floatType) AssignValue(dst reflect.Value, src Value) error {
	v := src.float()
	switch dst.Kind() {
	case reflect.Float32, reflect.Float64:
		dst.SetFloat(float64(v))
	default:
		dst.Set(reflect.ValueOf(v))
	}
	return nil
}

func (t floatType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *stringType:
		return convertStringToFloat(val)
	}
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToFloat(val)
	case Int32:
		return convertInt32ToFloat(val)
	case Int64:
		return convertInt64ToFloat(val)
	case Int96:
		return convertInt96ToFloat(val)
	case Float:
		return val, nil
	case Double:
		return convertDoubleToFloat(val)
	case ByteArray, FixedLenByteArray:
		return convertByteArrayToFloat(val)
	default:
		return makeValueKind(Float), nil
	}
}

type doubleType struct{}

func (t doubleType) String() string                           { return "DOUBLE" }
func (t doubleType) Kind() Kind                               { return Double }
func (t doubleType) Length() int                              { return 64 }
func (t doubleType) EstimateSize(n int) int                   { return 8 * n }
func (t doubleType) EstimateNumValues(n int) int              { return n / 8 }
func (t doubleType) Compare(a, b Value) int                   { return compareFloat64(a.double(), b.double()) }
func (t doubleType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
func (t doubleType) LogicalType() *format.LogicalType         { return nil }
func (t doubleType) ConvertedType() *deprecated.ConvertedType { return nil }
func (t doubleType) PhysicalType() *format.Type               { return &physicalTypes[Double] }

func (t doubleType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newDoubleColumnIndexer()
}

func (t doubleType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newDoubleColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t doubleType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newDoubleDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t doubleType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newDoublePage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t doubleType) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.DoubleValuesFromBytes(values)
}

func (t doubleType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeDouble(dst, src, enc)
}

func (t doubleType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeDouble(dst, src, enc)
}

func (t doubleType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t doubleType) AssignValue(dst reflect.Value, src Value) error {
	v := src.double()
	switch dst.Kind() {
	case reflect.Float32, reflect.Float64:
		dst.SetFloat(v)
	default:
		dst.Set(reflect.ValueOf(v))
	}
	return nil
}

func (t doubleType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *stringType:
		return convertStringToDouble(val)
	}
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToDouble(val)
	case Int32:
		return convertInt32ToDouble(val)
	case Int64:
		return convertInt64ToDouble(val)
	case Int96:
		return convertInt96ToDouble(val)
	case Float:
		return convertFloatToDouble(val)
	case Double:
		return val, nil
	case ByteArray, FixedLenByteArray:
		return convertByteArrayToDouble(val)
	default:
		return makeValueKind(Double), nil
	}
}

type byteArrayType struct{}

func (t byteArrayType) String() string                           { return "BYTE_ARRAY" }
func (t byteArrayType) Kind() Kind                               { return ByteArray }
func (t byteArrayType) Length() int                              { return 0 }
func (t byteArrayType) EstimateSize(n int) int                   { return estimatedSizeOfByteArrayValues * n }
func (t byteArrayType) EstimateNumValues(n int) int              { return n / estimatedSizeOfByteArrayValues }
func (t byteArrayType) Compare(a, b Value) int                   { return bytes.Compare(a.byteArray(), b.byteArray()) }
func (t byteArrayType) ColumnOrder() *format.ColumnOrder         { return &typeDefinedColumnOrder }
func (t byteArrayType) LogicalType() *format.LogicalType         { return nil }
func (t byteArrayType) ConvertedType() *deprecated.ConvertedType { return nil }
func (t byteArrayType) PhysicalType() *format.Type               { return &physicalTypes[ByteArray] }

func (t byteArrayType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newByteArrayColumnIndexer(sizeLimit)
}

func (t byteArrayType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t byteArrayType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t byteArrayType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t byteArrayType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return encoding.ByteArrayValues(values, offsets)
}

func (t byteArrayType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeByteArray(dst, src, enc)
}

func (t byteArrayType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeByteArray(dst, src, enc)
}

func (t byteArrayType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return enc.EstimateDecodeByteArraySize(src)
}

func (t byteArrayType) AssignValue(dst reflect.Value, src Value) error {
	v := src.byteArray()
	switch dst.Kind() {
	case reflect.String:
		dst.SetString(string(v))
	case reflect.Slice:
		dst.SetBytes(copyBytes(v))
	default:
		val := reflect.ValueOf(string(v))
		dst.Set(val)
	}
	return nil
}

func (t byteArrayType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToByteArray(val)
	case Int32:
		return convertInt32ToByteArray(val)
	case Int64:
		return convertInt64ToByteArray(val)
	case Int96:
		return convertInt96ToByteArray(val)
	case Float:
		return convertFloatToByteArray(val)
	case Double:
		return convertDoubleToByteArray(val)
	case ByteArray, FixedLenByteArray:
		return val, nil
	default:
		return makeValueKind(ByteArray), nil
	}
}

type fixedLenByteArrayType struct{ length int }

func (t fixedLenByteArrayType) String() string {
	return fmt.Sprintf("FIXED_LEN_BYTE_ARRAY(%d)", t.length)
}

func (t fixedLenByteArrayType) Kind() Kind { return FixedLenByteArray }

func (t fixedLenByteArrayType) Length() int { return t.length }

func (t fixedLenByteArrayType) EstimateSize(n int) int { return t.length * n }

func (t fixedLenByteArrayType) EstimateNumValues(n int) int { return n / t.length }

func (t fixedLenByteArrayType) Compare(a, b Value) int {
	return bytes.Compare(a.byteArray(), b.byteArray())
}

func (t fixedLenByteArrayType) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }

func (t fixedLenByteArrayType) LogicalType() *format.LogicalType { return nil }

func (t fixedLenByteArrayType) ConvertedType() *deprecated.ConvertedType { return nil }

func (t fixedLenByteArrayType) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }

func (t fixedLenByteArrayType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newFixedLenByteArrayColumnIndexer(t.length, sizeLimit)
}

func (t fixedLenByteArrayType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newFixedLenByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t fixedLenByteArrayType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newFixedLenByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t fixedLenByteArrayType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newFixedLenByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t fixedLenByteArrayType) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.FixedLenByteArrayValues(values, t.length)
}

func (t fixedLenByteArrayType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeFixedLenByteArray(dst, src, enc)
}

func (t fixedLenByteArrayType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeFixedLenByteArray(dst, src, enc)
}

func (t fixedLenByteArrayType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t fixedLenByteArrayType) AssignValue(dst reflect.Value, src Value) error {
	v := src.byteArray()
	switch dst.Kind() {
	case reflect.Array:
		if dst.Type().Elem().Kind() == reflect.Uint8 && dst.Len() == len(v) {
			// This code could be implemented as a call to reflect.Copy but
			// it would require creating a reflect.Value from v which causes
			// the heap allocation to pack the []byte value. To avoid this
			// overhead we instead convert the reflect.Value holding the
			// destination array into a byte slice which allows us to use
			// a more efficient call to copy.
			d := unsafe.Slice((*byte)(unsafecast.PointerOfValue(dst)), len(v))
			copy(d, v)
			return nil
		}
	case reflect.Slice:
		dst.SetBytes(copyBytes(v))
		return nil
	}

	val := reflect.ValueOf(copyBytes(v))
	dst.Set(val)
	return nil
}

func (t fixedLenByteArrayType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *stringType:
		return convertStringToFixedLenByteArray(val, t.length)
	}
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToFixedLenByteArray(val, t.length)
	case Int32:
		return convertInt32ToFixedLenByteArray(val, t.length)
	case Int64:
		return convertInt64ToFixedLenByteArray(val, t.length)
	case Int96:
		return convertInt96ToFixedLenByteArray(val, t.length)
	case Float:
		return convertFloatToFixedLenByteArray(val, t.length)
	case Double:
		return convertDoubleToFixedLenByteArray(val, t.length)
	case ByteArray, FixedLenByteArray:
		return convertByteArrayToFixedLenByteArray(val, t.length)
	default:
		return makeValueBytes(FixedLenByteArray, make([]byte, t.length)), nil
	}
}

type uint32Type struct{ int32Type }

func (t uint32Type) Compare(a, b Value) int {
	return compareUint32(a.uint32(), b.uint32())
}

func (t uint32Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newUint32ColumnIndexer()
}

func (t uint32Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newUint32ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t uint32Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newUint32Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t uint32Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newUint32Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

type uint64Type struct{ int64Type }

func (t uint64Type) Compare(a, b Value) int {
	return compareUint64(a.uint64(), b.uint64())
}

func (t uint64Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newUint64ColumnIndexer()
}

func (t uint64Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newUint64ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t uint64Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newUint64Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t uint64Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newUint64Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

// BE128 stands for "big-endian 128 bits". This type is used as a special case
// for fixed-length byte arrays of 16 bytes, which are commonly used to
// represent columns of random unique identifiers such as UUIDs.
//
// Comparisons of BE128 values use the natural byte order, the zeroth byte is
// the most significant byte.
//
// The special case is intended to provide optimizations based on the knowledge
// that the values are 16 bytes long. Stronger type checking can also be applied
// by the compiler when using [16]byte values rather than []byte, reducing the
// risk of errors on these common code paths.
type be128Type struct{}

func (t be128Type) String() string { return "FIXED_LEN_BYTE_ARRAY(16)" }

func (t be128Type) Kind() Kind { return FixedLenByteArray }

func (t be128Type) Length() int { return 16 }

func (t be128Type) EstimateSize(n int) int { return 16 * n }

func (t be128Type) EstimateNumValues(n int) int { return n / 16 }

func (t be128Type) Compare(a, b Value) int { return compareBE128(a.be128(), b.be128()) }

func (t be128Type) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }

func (t be128Type) LogicalType() *format.LogicalType { return nil }

func (t be128Type) ConvertedType() *deprecated.ConvertedType { return nil }

func (t be128Type) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }

func (t be128Type) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newBE128ColumnIndexer()
}

func (t be128Type) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newBE128ColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t be128Type) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newBE128Dictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t be128Type) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newBE128Page(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t be128Type) NewValues(values []byte, _ []uint32) encoding.Values {
	return encoding.FixedLenByteArrayValues(values, 16)
}

func (t be128Type) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeFixedLenByteArray(dst, src, enc)
}

func (t be128Type) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeFixedLenByteArray(dst, src, enc)
}

func (t be128Type) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.EstimateSize(numValues)
}

func (t be128Type) AssignValue(dst reflect.Value, src Value) error {
	return fixedLenByteArrayType{length: 16}.AssignValue(dst, src)
}

func (t be128Type) ConvertValue(val Value, typ Type) (Value, error) {
	return fixedLenByteArrayType{length: 16}.ConvertValue(val, typ)
}

// FixedLenByteArrayType constructs a type for fixed-length values of the given
// size (in bytes).
func FixedLenByteArrayType(length int) Type {
	switch length {
	case 16:
		return be128Type{}
	default:
		return fixedLenByteArrayType{length: length}
	}
}

// Int constructs a leaf node of signed integer logical type of the given bit
// width.
//
// The bit width must be one of 8, 16, 32, 64, or the function will panic.
func Int(bitWidth int) Node {
	return Leaf(integerType(bitWidth, &signedIntTypes))
}

// Uint constructs a leaf node of unsigned integer logical type of the given
// bit width.
//
// The bit width must be one of 8, 16, 32, 64, or the function will panic.
func Uint(bitWidth int) Node {
	return Leaf(integerType(bitWidth, &unsignedIntTypes))
}

func integerType(bitWidth int, types *[4]intType) *intType {
	switch bitWidth {
	case 8:
		return &types[0]
	case 16:
		return &types[1]
	case 32:
		return &types[2]
	case 64:
		return &types[3]
	default:
		panic(fmt.Sprintf("cannot create a %d bits parquet integer node", bitWidth))
	}
}

var signedIntTypes = [...]intType{
	{BitWidth: 8, IsSigned: true},
	{BitWidth: 16, IsSigned: true},
	{BitWidth: 32, IsSigned: true},
	{BitWidth: 64, IsSigned: true},
}

var unsignedIntTypes = [...]intType{
	{BitWidth: 8, IsSigned: false},
	{BitWidth: 16, IsSigned: false},
	{BitWidth: 32, IsSigned: false},
	{BitWidth: 64, IsSigned: false},
}

type intType format.IntType

func (t *intType) baseType() Type {
	if t.IsSigned {
		if t.BitWidth == 64 {
			return int64Type{}
		} else {
			return int32Type{}
		}
	} else {
		if t.BitWidth == 64 {
			return uint64Type{}
		} else {
			return uint32Type{}
		}
	}
}

func (t *intType) String() string { return (*format.IntType)(t).String() }

func (t *intType) Kind() Kind { return t.baseType().Kind() }

func (t *intType) Length() int { return int(t.BitWidth) }

func (t *intType) EstimateSize(n int) int { return (int(t.BitWidth) / 8) * n }

func (t *intType) EstimateNumValues(n int) int { return n / (int(t.BitWidth) / 8) }

func (t *intType) Compare(a, b Value) int {
	// This code is similar to t.baseType().Compare(a,b) but comparison methods
	// tend to be invoked a lot (e.g. when sorting) so avoiding the interface
	// indirection in this case yields much better throughput in some cases.
	if t.BitWidth == 64 {
		i1 := a.int64()
		i2 := b.int64()
		if t.IsSigned {
			return compareInt64(i1, i2)
		} else {
			return compareUint64(uint64(i1), uint64(i2))
		}
	} else {
		i1 := a.int32()
		i2 := b.int32()
		if t.IsSigned {
			return compareInt32(i1, i2)
		} else {
			return compareUint32(uint32(i1), uint32(i2))
		}
	}
}

func (t *intType) ColumnOrder() *format.ColumnOrder { return t.baseType().ColumnOrder() }

func (t *intType) PhysicalType() *format.Type { return t.baseType().PhysicalType() }

func (t *intType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Integer: (*format.IntType)(t)}
}

func (t *intType) ConvertedType() *deprecated.ConvertedType {
	convertedType := bits.Len8(uint8(t.BitWidth)/8) - 1 // 8=>0, 16=>1, 32=>2, 64=>4
	if t.IsSigned {
		convertedType += int(deprecated.Int8)
	} else {
		convertedType += int(deprecated.Uint8)
	}
	return &convertedTypes[convertedType]
}

func (t *intType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return t.baseType().NewColumnIndexer(sizeLimit)
}

func (t *intType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return t.baseType().NewColumnBuffer(columnIndex, numValues)
}

func (t *intType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return t.baseType().NewDictionary(columnIndex, numValues, data)
}

func (t *intType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return t.baseType().NewPage(columnIndex, numValues, data)
}

func (t *intType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return t.baseType().NewValues(values, offsets)
}

func (t *intType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return t.baseType().Encode(dst, src, enc)
}

func (t *intType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return t.baseType().Decode(dst, src, enc)
}

func (t *intType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.baseType().EstimateDecodeSize(numValues, src, enc)
}

func (t *intType) AssignValue(dst reflect.Value, src Value) error {
	if t.BitWidth == 64 {
		return int64Type{}.AssignValue(dst, src)
	} else {
		return int32Type{}.AssignValue(dst, src)
	}
}

func (t *intType) ConvertValue(val Value, typ Type) (Value, error) {
	if t.BitWidth == 64 {
		return int64Type{}.ConvertValue(val, typ)
	} else {
		return int32Type{}.ConvertValue(val, typ)
	}
}

// Decimal constructs a leaf node of decimal logical type with the given
// scale, precision, and underlying type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
func Decimal(scale, precision int, typ Type) Node {
	switch typ.Kind() {
	case Int32, Int64, FixedLenByteArray:
	default:
		panic("DECIMAL node must annotate Int32, Int64 or FixedLenByteArray but got " + typ.String())
	}
	return Leaf(&decimalType{
		decimal: format.DecimalType{
			Scale:     int32(scale),
			Precision: int32(precision),
		},
		Type: typ,
	})
}

type decimalType struct {
	decimal format.DecimalType
	Type
}

func (t *decimalType) String() string { return t.decimal.String() }

func (t *decimalType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Decimal: &t.decimal}
}

func (t *decimalType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.Decimal]
}

// String constructs a leaf node of UTF8 logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#string
func String() Node { return Leaf(&stringType{}) }

type stringType format.StringType

func (t *stringType) String() string { return (*format.StringType)(t).String() }

func (t *stringType) Kind() Kind { return ByteArray }

func (t *stringType) Length() int { return 0 }

func (t *stringType) EstimateSize(n int) int { return byteArrayType{}.EstimateSize(n) }

func (t *stringType) EstimateNumValues(n int) int { return byteArrayType{}.EstimateNumValues(n) }

func (t *stringType) Compare(a, b Value) int {
	return bytes.Compare(a.byteArray(), b.byteArray())
}

func (t *stringType) ColumnOrder() *format.ColumnOrder {
	return &typeDefinedColumnOrder
}

func (t *stringType) PhysicalType() *format.Type {
	return &physicalTypes[ByteArray]
}

func (t *stringType) LogicalType() *format.LogicalType {
	return &format.LogicalType{UTF8: (*format.StringType)(t)}
}

func (t *stringType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.UTF8]
}

func (t *stringType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return newByteArrayColumnIndexer(sizeLimit)
}

func (t *stringType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return newByteArrayDictionary(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t *stringType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return newByteArrayColumnBuffer(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t *stringType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return newByteArrayPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues), data)
}

func (t *stringType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return encoding.ByteArrayValues(values, offsets)
}

func (t *stringType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return encoding.EncodeByteArray(dst, src, enc)
}

func (t *stringType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return encoding.DecodeByteArray(dst, src, enc)
}

func (t *stringType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return byteArrayType{}.EstimateDecodeSize(numValues, src, enc)
}

func (t *stringType) AssignValue(dst reflect.Value, src Value) error {
	return byteArrayType{}.AssignValue(dst, src)
}

func (t *stringType) ConvertValue(val Value, typ Type) (Value, error) {
	switch t2 := typ.(type) {
	case *dateType:
		return convertDateToString(val)
	case *timeType:
		tz := t2.tz()
		if t2.Unit.Micros != nil {
			return convertTimeMicrosToString(val, tz)
		} else {
			return convertTimeMillisToString(val, tz)
		}
	}
	switch typ.Kind() {
	case Boolean:
		return convertBooleanToString(val)
	case Int32:
		return convertInt32ToString(val)
	case Int64:
		return convertInt64ToString(val)
	case Int96:
		return convertInt96ToString(val)
	case Float:
		return convertFloatToString(val)
	case Double:
		return convertDoubleToString(val)
	case ByteArray:
		return val, nil
	case FixedLenByteArray:
		return convertFixedLenByteArrayToString(val)
	default:
		return makeValueKind(ByteArray), nil
	}
}

// UUID constructs a leaf node of UUID logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#uuid
func UUID() Node { return Leaf(&uuidType{}) }

type uuidType format.UUIDType

func (t *uuidType) String() string { return (*format.UUIDType)(t).String() }

func (t *uuidType) Kind() Kind { return be128Type{}.Kind() }

func (t *uuidType) Length() int { return be128Type{}.Length() }

func (t *uuidType) EstimateSize(n int) int { return be128Type{}.EstimateSize(n) }

func (t *uuidType) EstimateNumValues(n int) int { return be128Type{}.EstimateNumValues(n) }

func (t *uuidType) Compare(a, b Value) int { return be128Type{}.Compare(a, b) }

func (t *uuidType) ColumnOrder() *format.ColumnOrder { return &typeDefinedColumnOrder }

func (t *uuidType) PhysicalType() *format.Type { return &physicalTypes[FixedLenByteArray] }

func (t *uuidType) LogicalType() *format.LogicalType {
	return &format.LogicalType{UUID: (*format.UUIDType)(t)}
}

func (t *uuidType) ConvertedType() *deprecated.ConvertedType { return nil }

func (t *uuidType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return be128Type{}.NewColumnIndexer(sizeLimit)
}

func (t *uuidType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return be128Type{}.NewDictionary(columnIndex, numValues, data)
}

func (t *uuidType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return be128Type{}.NewColumnBuffer(columnIndex, numValues)
}

func (t *uuidType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return be128Type{}.NewPage(columnIndex, numValues, data)
}

func (t *uuidType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return be128Type{}.NewValues(values, offsets)
}

func (t *uuidType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return be128Type{}.Encode(dst, src, enc)
}

func (t *uuidType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return be128Type{}.Decode(dst, src, enc)
}

func (t *uuidType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return be128Type{}.EstimateDecodeSize(numValues, src, enc)
}

func (t *uuidType) AssignValue(dst reflect.Value, src Value) error {
	return be128Type{}.AssignValue(dst, src)
}

func (t *uuidType) ConvertValue(val Value, typ Type) (Value, error) {
	return be128Type{}.ConvertValue(val, typ)
}

// Enum constructs a leaf node with a logical type representing enumerations.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#enum
func Enum() Node { return Leaf(&enumType{}) }

type enumType format.EnumType

func (t *enumType) String() string { return (*format.EnumType)(t).String() }

func (t *enumType) Kind() Kind { return new(stringType).Kind() }

func (t *enumType) Length() int { return new(stringType).Length() }

func (t *enumType) EstimateSize(n int) int { return new(stringType).EstimateSize(n) }

func (t *enumType) EstimateNumValues(n int) int { return new(stringType).EstimateNumValues(n) }

func (t *enumType) Compare(a, b Value) int { return new(stringType).Compare(a, b) }

func (t *enumType) ColumnOrder() *format.ColumnOrder { return new(stringType).ColumnOrder() }

func (t *enumType) PhysicalType() *format.Type { return new(stringType).PhysicalType() }

func (t *enumType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Enum: (*format.EnumType)(t)}
}

func (t *enumType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.Enum]
}

func (t *enumType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return new(stringType).NewColumnIndexer(sizeLimit)
}

func (t *enumType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return new(stringType).NewDictionary(columnIndex, numValues, data)
}

func (t *enumType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return new(stringType).NewColumnBuffer(columnIndex, numValues)
}

func (t *enumType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return new(stringType).NewPage(columnIndex, numValues, data)
}

func (t *enumType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return new(stringType).NewValues(values, offsets)
}

func (t *enumType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return new(stringType).Encode(dst, src, enc)
}

func (t *enumType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return new(stringType).Decode(dst, src, enc)
}

func (t *enumType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return new(stringType).EstimateDecodeSize(numValues, src, enc)
}

func (t *enumType) AssignValue(dst reflect.Value, src Value) error {
	return new(stringType).AssignValue(dst, src)
}

func (t *enumType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *byteArrayType, *stringType, *enumType:
		return val, nil
	default:
		return val, invalidConversion(val, "ENUM", typ.String())
	}
}

// JSON constructs a leaf node of JSON logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#json
func JSON() Node { return Leaf(&jsonType{}) }

type jsonType format.JsonType

func (t *jsonType) String() string { return (*format.JsonType)(t).String() }

func (t *jsonType) Kind() Kind { return byteArrayType{}.Kind() }

func (t *jsonType) Length() int { return byteArrayType{}.Length() }

func (t *jsonType) EstimateSize(n int) int { return byteArrayType{}.EstimateSize(n) }

func (t *jsonType) EstimateNumValues(n int) int { return byteArrayType{}.EstimateNumValues(n) }

func (t *jsonType) Compare(a, b Value) int { return byteArrayType{}.Compare(a, b) }

func (t *jsonType) ColumnOrder() *format.ColumnOrder { return byteArrayType{}.ColumnOrder() }

func (t *jsonType) PhysicalType() *format.Type { return byteArrayType{}.PhysicalType() }

func (t *jsonType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Json: (*format.JsonType)(t)}
}

func (t *jsonType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.Json]
}

func (t *jsonType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return byteArrayType{}.NewColumnIndexer(sizeLimit)
}

func (t *jsonType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return byteArrayType{}.NewDictionary(columnIndex, numValues, data)
}

func (t *jsonType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return byteArrayType{}.NewColumnBuffer(columnIndex, numValues)
}

func (t *jsonType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return byteArrayType{}.NewPage(columnIndex, numValues, data)
}

func (t *jsonType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return byteArrayType{}.NewValues(values, offsets)
}

func (t *jsonType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return byteArrayType{}.Encode(dst, src, enc)
}

func (t *jsonType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return byteArrayType{}.Decode(dst, src, enc)
}

func (t *jsonType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return byteArrayType{}.EstimateDecodeSize(numValues, src, enc)
}

func (t *jsonType) AssignValue(dst reflect.Value, src Value) error {
	// Assign value using ByteArrayType for BC...
	switch dst.Kind() {
	case reflect.String:
		return byteArrayType{}.AssignValue(dst, src)
	case reflect.Slice:
		if dst.Type().Elem().Kind() == reflect.Uint8 {
			return byteArrayType{}.AssignValue(dst, src)
		}
	}

	// Otherwise handle with json.Unmarshal
	b := src.byteArray()
	val := reflect.New(dst.Type()).Elem()
	err := json.Unmarshal(b, val.Addr().Interface())
	if err != nil {
		return err
	}
	dst.Set(val)
	return nil
}

func (t *jsonType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *byteArrayType, *stringType, *jsonType:
		return val, nil
	default:
		return val, invalidConversion(val, "JSON", typ.String())
	}
}

// BSON constructs a leaf node of BSON logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#bson
func BSON() Node { return Leaf(&bsonType{}) }

type bsonType format.BsonType

func (t *bsonType) String() string { return (*format.BsonType)(t).String() }

func (t *bsonType) Kind() Kind { return byteArrayType{}.Kind() }

func (t *bsonType) Length() int { return byteArrayType{}.Length() }

func (t *bsonType) EstimateSize(n int) int { return byteArrayType{}.EstimateSize(n) }

func (t *bsonType) EstimateNumValues(n int) int { return byteArrayType{}.EstimateNumValues(n) }

func (t *bsonType) Compare(a, b Value) int { return byteArrayType{}.Compare(a, b) }

func (t *bsonType) ColumnOrder() *format.ColumnOrder { return byteArrayType{}.ColumnOrder() }

func (t *bsonType) PhysicalType() *format.Type { return byteArrayType{}.PhysicalType() }

func (t *bsonType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Bson: (*format.BsonType)(t)}
}

func (t *bsonType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.Bson]
}

func (t *bsonType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return byteArrayType{}.NewColumnIndexer(sizeLimit)
}

func (t *bsonType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return byteArrayType{}.NewDictionary(columnIndex, numValues, data)
}

func (t *bsonType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return byteArrayType{}.NewColumnBuffer(columnIndex, numValues)
}

func (t *bsonType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return byteArrayType{}.NewPage(columnIndex, numValues, data)
}

func (t *bsonType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return byteArrayType{}.NewValues(values, offsets)
}

func (t *bsonType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return byteArrayType{}.Encode(dst, src, enc)
}

func (t *bsonType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return byteArrayType{}.Decode(dst, src, enc)
}

func (t *bsonType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return byteArrayType{}.EstimateDecodeSize(numValues, src, enc)
}

func (t *bsonType) AssignValue(dst reflect.Value, src Value) error {
	return byteArrayType{}.AssignValue(dst, src)
}

func (t *bsonType) ConvertValue(val Value, typ Type) (Value, error) {
	switch typ.(type) {
	case *byteArrayType, *bsonType:
		return val, nil
	default:
		return val, invalidConversion(val, "BSON", typ.String())
	}
}

// Date constructs a leaf node of DATE logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date
func Date() Node { return Leaf(&dateType{}) }

type dateType format.DateType

func (t *dateType) String() string { return (*format.DateType)(t).String() }

func (t *dateType) Kind() Kind { return int32Type{}.Kind() }

func (t *dateType) Length() int { return int32Type{}.Length() }

func (t *dateType) EstimateSize(n int) int { return int32Type{}.EstimateSize(n) }

func (t *dateType) EstimateNumValues(n int) int { return int32Type{}.EstimateNumValues(n) }

func (t *dateType) Compare(a, b Value) int { return int32Type{}.Compare(a, b) }

func (t *dateType) ColumnOrder() *format.ColumnOrder { return int32Type{}.ColumnOrder() }

func (t *dateType) PhysicalType() *format.Type { return int32Type{}.PhysicalType() }

func (t *dateType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Date: (*format.DateType)(t)}
}

func (t *dateType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.Date]
}

func (t *dateType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return int32Type{}.NewColumnIndexer(sizeLimit)
}

func (t *dateType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return int32Type{}.NewDictionary(columnIndex, numValues, data)
}

func (t *dateType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return int32Type{}.NewColumnBuffer(columnIndex, numValues)
}

func (t *dateType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return int32Type{}.NewPage(columnIndex, numValues, data)
}

func (t *dateType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return int32Type{}.NewValues(values, offsets)
}

func (t *dateType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return int32Type{}.Encode(dst, src, enc)
}

func (t *dateType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return int32Type{}.Decode(dst, src, enc)
}

func (t *dateType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return int32Type{}.EstimateDecodeSize(numValues, src, enc)
}

func (t *dateType) AssignValue(dst reflect.Value, src Value) error {
	return int32Type{}.AssignValue(dst, src)
}

func (t *dateType) ConvertValue(val Value, typ Type) (Value, error) {
	switch src := typ.(type) {
	case *stringType:
		return convertStringToDate(val, time.UTC)
	case *timestampType:
		return convertTimestampToDate(val, src.Unit, src.tz())
	}
	return int32Type{}.ConvertValue(val, typ)
}

// TimeUnit represents units of time in the parquet type system.
type TimeUnit interface {
	// Returns the precision of the time unit as a time.Duration value.
	Duration() time.Duration
	// Converts the TimeUnit value to its representation in the parquet thrift
	// format.
	TimeUnit() format.TimeUnit
}

var (
	Millisecond TimeUnit = &millisecond{}
	Microsecond TimeUnit = &microsecond{}
	Nanosecond  TimeUnit = &nanosecond{}
)

type millisecond format.MilliSeconds

func (u *millisecond) Duration() time.Duration { return time.Millisecond }
func (u *millisecond) TimeUnit() format.TimeUnit {
	return format.TimeUnit{Millis: (*format.MilliSeconds)(u)}
}

type microsecond format.MicroSeconds

func (u *microsecond) Duration() time.Duration { return time.Microsecond }
func (u *microsecond) TimeUnit() format.TimeUnit {
	return format.TimeUnit{Micros: (*format.MicroSeconds)(u)}
}

type nanosecond format.NanoSeconds

func (u *nanosecond) Duration() time.Duration { return time.Nanosecond }
func (u *nanosecond) TimeUnit() format.TimeUnit {
	return format.TimeUnit{Nanos: (*format.NanoSeconds)(u)}
}

// Time constructs a leaf node of TIME logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#time
func Time(unit TimeUnit) Node {
	return Leaf(&timeType{IsAdjustedToUTC: true, Unit: unit.TimeUnit()})
}

type timeType format.TimeType

func (t *timeType) tz() *time.Location {
	if t.IsAdjustedToUTC {
		return time.UTC
	} else {
		return time.Local
	}
}

func (t *timeType) baseType() Type {
	if t.useInt32() {
		return int32Type{}
	} else {
		return int64Type{}
	}
}

func (t *timeType) useInt32() bool { return t.Unit.Millis != nil }

func (t *timeType) useInt64() bool { return t.Unit.Micros != nil }

func (t *timeType) String() string { return (*format.TimeType)(t).String() }

func (t *timeType) Kind() Kind { return t.baseType().Kind() }

func (t *timeType) Length() int { return t.baseType().Length() }

func (t *timeType) EstimateSize(n int) int { return t.baseType().EstimateSize(n) }

func (t *timeType) EstimateNumValues(n int) int { return t.baseType().EstimateNumValues(n) }

func (t *timeType) Compare(a, b Value) int { return t.baseType().Compare(a, b) }

func (t *timeType) ColumnOrder() *format.ColumnOrder { return t.baseType().ColumnOrder() }

func (t *timeType) PhysicalType() *format.Type { return t.baseType().PhysicalType() }

func (t *timeType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Time: (*format.TimeType)(t)}
}

func (t *timeType) ConvertedType() *deprecated.ConvertedType {
	switch {
	case t.useInt32():
		return &convertedTypes[deprecated.TimeMillis]
	case t.useInt64():
		return &convertedTypes[deprecated.TimeMicros]
	default:
		return nil
	}
}

func (t *timeType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return t.baseType().NewColumnIndexer(sizeLimit)
}

func (t *timeType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return t.baseType().NewColumnBuffer(columnIndex, numValues)
}

func (t *timeType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return t.baseType().NewDictionary(columnIndex, numValues, data)
}

func (t *timeType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return t.baseType().NewPage(columnIndex, numValues, data)
}

func (t *timeType) NewValues(values []byte, offset []uint32) encoding.Values {
	return t.baseType().NewValues(values, offset)
}

func (t *timeType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return t.baseType().Encode(dst, src, enc)
}

func (t *timeType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return t.baseType().Decode(dst, src, enc)
}

func (t *timeType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return t.baseType().EstimateDecodeSize(numValues, src, enc)
}

func (t *timeType) AssignValue(dst reflect.Value, src Value) error {
	return t.baseType().AssignValue(dst, src)
}

func (t *timeType) ConvertValue(val Value, typ Type) (Value, error) {
	switch src := typ.(type) {
	case *stringType:
		tz := t.tz()
		if t.Unit.Micros != nil {
			return convertStringToTimeMicros(val, tz)
		} else {
			return convertStringToTimeMillis(val, tz)
		}
	case *timestampType:
		tz := t.tz()
		if t.Unit.Micros != nil {
			return convertTimestampToTimeMicros(val, src.Unit, src.tz(), tz)
		} else {
			return convertTimestampToTimeMillis(val, src.Unit, src.tz(), tz)
		}
	}
	return t.baseType().ConvertValue(val, typ)
}

// Timestamp constructs of leaf node of TIMESTAMP logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#timestamp
func Timestamp(unit TimeUnit) Node {
	return Leaf(&timestampType{IsAdjustedToUTC: true, Unit: unit.TimeUnit()})
}

type timestampType format.TimestampType

func (t *timestampType) tz() *time.Location {
	if t.IsAdjustedToUTC {
		return time.UTC
	} else {
		return time.Local
	}
}

func (t *timestampType) String() string { return (*format.TimestampType)(t).String() }

func (t *timestampType) Kind() Kind { return int64Type{}.Kind() }

func (t *timestampType) Length() int { return int64Type{}.Length() }

func (t *timestampType) EstimateSize(n int) int { return int64Type{}.EstimateSize(n) }

func (t *timestampType) EstimateNumValues(n int) int { return int64Type{}.EstimateNumValues(n) }

func (t *timestampType) Compare(a, b Value) int { return int64Type{}.Compare(a, b) }

func (t *timestampType) ColumnOrder() *format.ColumnOrder { return int64Type{}.ColumnOrder() }

func (t *timestampType) PhysicalType() *format.Type { return int64Type{}.PhysicalType() }

func (t *timestampType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Timestamp: (*format.TimestampType)(t)}
}

func (t *timestampType) ConvertedType() *deprecated.ConvertedType {
	switch {
	case t.Unit.Millis != nil:
		return &convertedTypes[deprecated.TimestampMillis]
	case t.Unit.Micros != nil:
		return &convertedTypes[deprecated.TimestampMicros]
	default:
		return nil
	}
}

func (t *timestampType) NewColumnIndexer(sizeLimit int) ColumnIndexer {
	return int64Type{}.NewColumnIndexer(sizeLimit)
}

func (t *timestampType) NewDictionary(columnIndex, numValues int, data encoding.Values) Dictionary {
	return int64Type{}.NewDictionary(columnIndex, numValues, data)
}

func (t *timestampType) NewColumnBuffer(columnIndex, numValues int) ColumnBuffer {
	return int64Type{}.NewColumnBuffer(columnIndex, numValues)
}

func (t *timestampType) NewPage(columnIndex, numValues int, data encoding.Values) Page {
	return int64Type{}.NewPage(columnIndex, numValues, data)
}

func (t *timestampType) NewValues(values []byte, offsets []uint32) encoding.Values {
	return int64Type{}.NewValues(values, offsets)
}

func (t *timestampType) Encode(dst []byte, src encoding.Values, enc encoding.Encoding) ([]byte, error) {
	return int64Type{}.Encode(dst, src, enc)
}

func (t *timestampType) Decode(dst encoding.Values, src []byte, enc encoding.Encoding) (encoding.Values, error) {
	return int64Type{}.Decode(dst, src, enc)
}

func (t *timestampType) EstimateDecodeSize(numValues int, src []byte, enc encoding.Encoding) int {
	return int64Type{}.EstimateDecodeSize(numValues, src, enc)
}

func (t *timestampType) AssignValue(dst reflect.Value, src Value) error {
	switch dst.Type() {
	case reflect.TypeOf(time.Time{}):
		unit := Nanosecond.TimeUnit()
		lt := t.LogicalType()
		if lt != nil && lt.Timestamp != nil {
			unit = lt.Timestamp.Unit
		}

		nanos := src.int64()
		switch {
		case unit.Millis != nil:
			nanos = nanos * 1e6
		case unit.Micros != nil:
			nanos = nanos * 1e3
		}

		val := time.Unix(0, nanos).UTC()
		dst.Set(reflect.ValueOf(val))
		return nil
	default:
		return int64Type{}.AssignValue(dst, src)
	}
}

func (t *timestampType) ConvertValue(val Value, typ Type) (Value, error) {
	switch src := typ.(type) {
	case *timestampType:
		return convertTimestampToTimestamp(val, src.Unit, t.Unit)
	case *dateType:
		return convertDateToTimestamp(val, t.Unit, t.tz())
	}
	return int64Type{}.ConvertValue(val, typ)
}

// List constructs a node of LIST logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists
func List(of Node) Node {
	return listNode{Group{"list": Repeated(Group{"element": of})}}
}

type listNode struct{ Group }

func (listNode) Type() Type { return &listType{} }

type listType format.ListType

func (t *listType) String() string { return (*format.ListType)(t).String() }

func (t *listType) Kind() Kind { panic("cannot call Kind on parquet LIST type") }

func (t *listType) Length() int { return 0 }

func (t *listType) EstimateSize(int) int { return 0 }

func (t *listType) EstimateNumValues(int) int { return 0 }

func (t *listType) Compare(Value, Value) int { panic("cannot compare values on parquet LIST type") }

func (t *listType) ColumnOrder() *format.ColumnOrder { return nil }

func (t *listType) PhysicalType() *format.Type { return nil }

func (t *listType) LogicalType() *format.LogicalType {
	return &format.LogicalType{List: (*format.ListType)(t)}
}

func (t *listType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.List]
}

func (t *listType) NewColumnIndexer(int) ColumnIndexer {
	panic("create create column indexer from parquet LIST type")
}

func (t *listType) NewDictionary(int, int, encoding.Values) Dictionary {
	panic("cannot create dictionary from parquet LIST type")
}

func (t *listType) NewColumnBuffer(int, int) ColumnBuffer {
	panic("cannot create column buffer from parquet LIST type")
}

func (t *listType) NewPage(int, int, encoding.Values) Page {
	panic("cannot create page from parquet LIST type")
}

func (t *listType) NewValues(values []byte, _ []uint32) encoding.Values {
	panic("cannot create values from parquet LIST type")
}

func (t *listType) Encode(_ []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
	panic("cannot encode parquet LIST type")
}

func (t *listType) Decode(_ encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
	panic("cannot decode parquet LIST type")
}

func (t *listType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
	panic("cannot estimate decode size of parquet LIST type")
}

func (t *listType) AssignValue(reflect.Value, Value) error {
	panic("cannot assign value to a parquet LIST type")
}

func (t *listType) ConvertValue(Value, Type) (Value, error) {
	panic("cannot convert value to a parquet LIST type")
}

// Map constructs a node of MAP logical type.
//
// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps
func Map(key, value Node) Node {
	return mapNode{Group{
		"key_value": Repeated(Group{
			"key":   Required(key),
			"value": value,
		}),
	}}
}

type mapNode struct{ Group }

func (mapNode) Type() Type { return &mapType{} }

type mapType format.MapType

func (t *mapType) String() string { return (*format.MapType)(t).String() }

func (t *mapType) Kind() Kind { panic("cannot call Kind on parquet MAP type") }

func (t *mapType) Length() int { return 0 }

func (t *mapType) EstimateSize(int) int { return 0 }

func (t *mapType) EstimateNumValues(int) int { return 0 }

func (t *mapType) Compare(Value, Value) int { panic("cannot compare values on parquet MAP type") }

func (t *mapType) ColumnOrder() *format.ColumnOrder { return nil }

func (t *mapType) PhysicalType() *format.Type { return nil }

func (t *mapType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Map: (*format.MapType)(t)}
}

func (t *mapType) ConvertedType() *deprecated.ConvertedType {
	return &convertedTypes[deprecated.Map]
}

func (t *mapType) NewColumnIndexer(int) ColumnIndexer {
	panic("create create column indexer from parquet MAP type")
}

func (t *mapType) NewDictionary(int, int, encoding.Values) Dictionary {
	panic("cannot create dictionary from parquet MAP type")
}

func (t *mapType) NewColumnBuffer(int, int) ColumnBuffer {
	panic("cannot create column buffer from parquet MAP type")
}

func (t *mapType) NewPage(int, int, encoding.Values) Page {
	panic("cannot create page from parquet MAP type")
}

func (t *mapType) NewValues(values []byte, _ []uint32) encoding.Values {
	panic("cannot create values from parquet MAP type")
}

func (t *mapType) Encode(_ []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
	panic("cannot encode parquet MAP type")
}

func (t *mapType) Decode(_ encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
	panic("cannot decode parquet MAP type")
}

func (t *mapType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
	panic("cannot estimate decode size of parquet MAP type")
}

func (t *mapType) AssignValue(reflect.Value, Value) error {
	panic("cannot assign value to a parquet MAP type")
}

func (t *mapType) ConvertValue(Value, Type) (Value, error) {
	panic("cannot convert value to a parquet MAP type")
}

type nullType format.NullType

func (t *nullType) String() string { return (*format.NullType)(t).String() }

func (t *nullType) Kind() Kind { return -1 }

func (t *nullType) Length() int { return 0 }

func (t *nullType) EstimateSize(int) int { return 0 }

func (t *nullType) EstimateNumValues(int) int { return 0 }

func (t *nullType) Compare(Value, Value) int { panic("cannot compare values on parquet NULL type") }

func (t *nullType) ColumnOrder() *format.ColumnOrder { return nil }

func (t *nullType) PhysicalType() *format.Type { return nil }

func (t *nullType) LogicalType() *format.LogicalType {
	return &format.LogicalType{Unknown: (*format.NullType)(t)}
}

func (t *nullType) ConvertedType() *deprecated.ConvertedType { return nil }

func (t *nullType) NewColumnIndexer(int) ColumnIndexer {
	panic("create create column indexer from parquet NULL type")
}

func (t *nullType) NewDictionary(int, int, encoding.Values) Dictionary {
	panic("cannot create dictionary from parquet NULL type")
}

func (t *nullType) NewColumnBuffer(int, int) ColumnBuffer {
	panic("cannot create column buffer from parquet NULL type")
}

func (t *nullType) NewPage(columnIndex, numValues int, _ encoding.Values) Page {
	return newNullPage(t, makeColumnIndex(columnIndex), makeNumValues(numValues))
}

func (t *nullType) NewValues(_ []byte, _ []uint32) encoding.Values {
	return encoding.Values{}
}

func (t *nullType) Encode(dst []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
	return dst[:0], nil
}

func (t *nullType) Decode(dst encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
	return dst, nil
}

func (t *nullType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
	return 0
}

func (t *nullType) AssignValue(reflect.Value, Value) error {
	return nil
}

func (t *nullType) ConvertValue(val Value, _ Type) (Value, error) {
	return val, nil
}

type groupType struct{}

func (groupType) String() string { return "group" }

func (groupType) Kind() Kind {
	panic("cannot call Kind on parquet group")
}

func (groupType) Compare(Value, Value) int {
	panic("cannot compare values on parquet group")
}

func (groupType) NewColumnIndexer(int) ColumnIndexer {
	panic("cannot create column indexer from parquet group")
}

func (groupType) NewDictionary(int, int, encoding.Values) Dictionary {
	panic("cannot create dictionary from parquet group")
}

func (t groupType) NewColumnBuffer(int, int) ColumnBuffer {
	panic("cannot create column buffer from parquet group")
}

func (t groupType) NewPage(int, int, encoding.Values) Page {
	panic("cannot create page from parquet group")
}

func (t groupType) NewValues(_ []byte, _ []uint32) encoding.Values {
	panic("cannot create values from parquet group")
}

func (groupType) Encode(_ []byte, _ encoding.Values, _ encoding.Encoding) ([]byte, error) {
	panic("cannot encode parquet group")
}

func (groupType) Decode(_ encoding.Values, _ []byte, _ encoding.Encoding) (encoding.Values, error) {
	panic("cannot decode parquet group")
}

func (groupType) EstimateDecodeSize(_ int, _ []byte, _ encoding.Encoding) int {
	panic("cannot estimate decode size of parquet group")
}

func (groupType) AssignValue(reflect.Value, Value) error {
	panic("cannot assign value to a parquet group")
}

func (t groupType) ConvertValue(Value, Type) (Value, error) {
	panic("cannot convert value to a parquet group")
}

func (groupType) Length() int { return 0 }

func (groupType) EstimateSize(int) int { return 0 }

func (groupType) EstimateNumValues(int) int { return 0 }

func (groupType) ColumnOrder() *format.ColumnOrder { return nil }

func (groupType) PhysicalType() *format.Type { return nil }

func (groupType) LogicalType() *format.LogicalType { return nil }

func (groupType) ConvertedType() *deprecated.ConvertedType { return nil }

func checkTypeKindEqual(to, from Type) error {
	if to.Kind() != from.Kind() {
		return fmt.Errorf("cannot convert from parquet value of type %s to %s", from, to)
	}
	return nil
}


================================================
FILE: value.go
================================================
package parquet

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"io"
	"math"
	"reflect"
	"strconv"
	"time"
	"unsafe"

	"github.com/google/uuid"
	"github.com/segmentio/parquet-go/deprecated"
	"github.com/segmentio/parquet-go/format"
	"github.com/segmentio/parquet-go/internal/unsafecast"
)

const (
	// 170 x sizeof(Value) = 4KB
	defaultValueBufferSize = 170
)

// The Value type is similar to the reflect.Value abstraction of Go values, but
// for parquet values. Value instances wrap underlying Go values mapped to one
// of the parquet physical types.
//
// Value instances are small, immutable objects, and usually passed by value
// between function calls.
//
// The zero-value of Value represents the null parquet value.
type Value struct {
	// data
	ptr *byte
	u64 uint64
	// type
	kind int8 // XOR(Kind) so the zero-value is <null>
	// levels
	definitionLevel byte
	repetitionLevel byte
	columnIndex     int16 // XOR so the zero-value is -1
}

// ValueReader is an interface implemented by types that support reading
// batches of values.
type ValueReader interface {
	// Read values into the buffer passed as argument and return the number of
	// values read. When all values have been read, the error will be io.EOF.
	ReadValues([]Value) (int, error)
}

// ValueReaderAt is an interface implemented by types that support reading
// values at offsets specified by the application.
type ValueReaderAt interface {
	ReadValuesAt([]Value, int64) (int, error)
}

// ValueReaderFrom is an interface implemented by value writers to read values
// from a reader.
type ValueReaderFrom interface {
	ReadValuesFrom(ValueReader) (int64, error)
}

// ValueWriter is an interface implemented by types that support reading
// batches of values.
type ValueWriter interface {
	// Write values from the buffer passed as argument and returns the number
	// of values written.
	WriteValues([]Value) (int, error)
}

// ValueWriterTo is an interface implemented by value readers to write values to
// a writer.
type ValueWriterTo interface {
	WriteValuesTo(ValueWriter) (int64, error)
}

// ValueReaderFunc is a function type implementing the ValueReader interface.
type ValueReaderFunc func([]Value) (int, error)

func (f ValueReaderFunc) ReadValues(values []Value) (int, error) { return f(values) }

// ValueWriterFunc is a function type implementing the ValueWriter interface.
type ValueWriterFunc func([]Value) (int, error)

func (f ValueWriterFunc) WriteValues(values []Value) (int, error) { return f(values) }

// CopyValues copies values from src to dst, returning the number of values
// that were written.
//
// As an optimization, the reader and writer may choose to implement
// ValueReaderFrom and ValueWriterTo to provide their own copy logic.
//
// The function returns any error it encounters reading or writing pages, except
// for io.EOF from the reader which indicates that there were no more values to
// read.
func CopyValues(dst ValueWriter, src ValueReader) (int64, error) {
	return copyValues(dst, src, nil)
}

func copyValues(dst ValueWriter, src ValueReader, buf []Value) (written int64, err error) {
	if wt, ok := src.(ValueWriterTo); ok {
		return wt.WriteValuesTo(dst)
	}

	if rf, ok := dst.(ValueReaderFrom); ok {
		return rf.ReadValuesFrom(src)
	}

	if len(buf) == 0 {
		buf = make([]Value, defaultValueBufferSize)
	}

	defer clearValues(buf)

	for {
		n, err := src.ReadValues(buf)

		if n > 0 {
			wn, werr := dst.WriteValues(buf[:n])
			written += int64(wn)
			if werr != nil {
				return written, werr
			}
		}

		if err != nil {
			if err == io.EOF {
				err = nil
			}
			return written, err
		}

		if n == 0 {
			return written, io.ErrNoProgress
		}
	}
}

// ValueOf constructs a parquet value from a Go value v.
//
// The physical type of the value is assumed from the Go type of v using the
// following conversion table:
//
//	Go type | Parquet physical type
//	------- | ---------------------
//	nil     | NULL
//	bool    | BOOLEAN
//	int8    | INT32
//	int16   | INT32
//	int32   | INT32
//	int64   | INT64
//	int     | INT64
//	uint8   | INT32
//	uint16  | INT32
//	uint32  | INT32
//	uint64  | INT64
//	uintptr | INT64
//	float32 | FLOAT
//	float64 | DOUBLE
//	string  | BYTE_ARRAY
//	[]byte  | BYTE_ARRAY
//	[*]byte | FIXED_LEN_BYTE_ARRAY
//
// When converting a []byte or [*]byte value, the underlying byte array is not
// copied; instead, the returned parquet value holds a reference to it.
//
// The repetition and definition levels of the returned value are both zero.
//
// The function panics if the Go value cannot be represented in parquet.
func ValueOf(v interface{}) Value {
	k := Kind(-1)
	t := reflect.TypeOf(v)

	switch value := v.(type) {
	case nil:
		return Value{}
	case uuid.UUID:
		return makeValueBytes(FixedLenByteArray, value[:])
	case deprecated.Int96:
		return makeValueInt96(value)
	case time.Time:
		k = Int64
	}

	switch t.Kind() {
	case reflect.Bool:
		k = Boolean
	case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Uint8, reflect.Uint16, reflect.Uint32:
		k = Int32
	case reflect.Int64, reflect.Int, reflect.Uint64, reflect.Uint, reflect.Uintptr:
		k = Int64
	case reflect.Float32:
		k = Float
	case reflect.Float64:
		k = Double
	case reflect.String:
		k = ByteArray
	case reflect.Slice:
		if t.Elem().Kind() == reflect.Uint8 {
			k = ByteArray
		}
	case reflect.Array:
		if t.Elem().Kind() == reflect.Uint8 {
			k = FixedLenByteArray
		}
	}

	if k < 0 {
		panic("cannot create parquet value from go value of type " + t.String())
	}

	return makeValue(k, nil, reflect.ValueOf(v))
}

// NulLValue constructs a null value, which is the zero-value of the Value type.
func NullValue() Value { return Value{} }

// ZeroValue constructs a zero value of the given kind.
func ZeroValue(kind Kind) Value { return makeValueKind(kind) }

// BooleanValue constructs a BOOLEAN parquet value from the bool passed as
// argument.
func BooleanValue(value bool) Value { return makeValueBoolean(value) }

// Int32Value constructs a INT32 parquet value from the int32 passed as
// argument.
func Int32Value(value int32) Value { return makeValueInt32(value) }

// Int64Value constructs a INT64 parquet value from the int64 passed as
// argument.
func Int64Value(value int64) Value { return makeValueInt64(value) }

// Int96Value constructs a INT96 parquet value from the deprecated.Int96 passed
// as argument.
func Int96Value(value deprecated.Int96) Value { return makeValueInt96(value) }

// FloatValue constructs a FLOAT parquet value from the float32 passed as
// argument.
func FloatValue(value float32) Value { return makeValueFloat(value) }

// DoubleValue constructs a DOUBLE parquet value from the float64 passed as
// argument.
func DoubleValue(value float64) Value { return makeValueDouble(value) }

// ByteArrayValue constructs a BYTE_ARRAY parquet value from the byte slice
// passed as argument.
func ByteArrayValue(value []byte) Value { return makeValueBytes(ByteArray, value) }

// FixedLenByteArrayValue constructs a BYTE_ARRAY parquet value from the byte
// slice passed as argument.
func FixedLenByteArrayValue(value []byte) Value { return makeValueBytes(FixedLenByteArray, value) }

func makeValue(k Kind, lt *format.LogicalType, v reflect.Value) Value {
	switch v.Type() {
	case reflect.TypeOf(time.Time{}):
		unit := Nanosecond.TimeUnit()
		if lt != nil && lt.Timestamp != nil {
			unit = lt.Timestamp.Unit
		}

		t := v.Interface().(time.Time)
		var val int64
		switch {
		case unit.Millis != nil:
			val = t.UnixMilli()
		case unit.Micros != nil:
			val = t.UnixMicro()
		default:
			val = t.UnixNano()
		}
		return makeValueInt64(val)
	}

	switch k {
	case Boolean:
		return makeValueBoolean(v.Bool())

	case Int32:
		switch v.Kind() {
		case reflect.Int8, reflect.Int16, reflect.Int32:
			return makeValueInt32(int32(v.Int()))
		case reflect.Uint8, reflect.Uint16, reflect.Uint32:
			return makeValueInt32(int32(v.Uint()))
		}

	case Int64:
		switch v.Kind() {
		case reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Int:
			return makeValueInt64(v.Int())
		case reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uint, reflect.Uintptr:
			return makeValueUint64(v.Uint())
		}

	case Int96:
		switch v.Type() {
		case reflect.TypeOf(deprecated.Int96{}):
			return makeValueInt96(v.Interface().(deprecated.Int96))
		}

	case Float:
		switch v.Kind() {
		case reflect.Float32:
			return makeValueFloat(float32(v.Float()))
		}

	case Double:
		switch v.Kind() {
		case reflect.Float32, reflect.Float64:
			return makeValueDouble(v.Float())
		}

	case ByteArray:
		switch v.Kind() {
		case reflect.String:
			return makeValueString(k, v.String())
		case reflect.Slice:
			if v.Type().Elem().Kind() == reflect.Uint8 {
				return makeValueBytes(k, v.Bytes())
			}
		}

	case FixedLenByteArray:
		switch v.Kind() {
		case reflect.String: // uuid
			return makeValueString(k, v.String())
		case reflect.Array:
			if v.Type().Elem().Kind() == reflect.Uint8 {
				return makeValueFixedLenByteArray(v)
			}
		case reflect.Slice:
			if v.Type().Elem().Kind() == reflect.Uint8 {
				return makeValueBytes(k, v.Bytes())
			}
		}
	}

	panic("cannot create parquet value of type " + k.String() + " from go value of type " + v.Type().String())
}

func makeValueKind(kind Kind) Value {
	return Value{kind: ^int8(kind)}
}

func makeValueBoolean(value bool) Value {
	v := Value{kind: ^int8(Boolean)}
	if value {
		v.u64 = 1
	}
	return v
}

func makeValueInt32(value int32) Value {
	return Value{
		kind: ^int8(Int32),
		u64:  uint64(value),
	}
}

func makeValueInt64(value int64) Value {
	return Value{
		kind: ^int8(Int64),
		u64:  uint64(value),
	}
}

func makeValueInt96(value deprecated.Int96) Value {
	// TODO: this is highly inefficient because we need a heap allocation to
	// store the value; we don't expect INT96 to be used frequently since it
	// is a deprecated feature of parquet, and it helps keep the Value type
	// compact for all the other more common cases.
	bits := [12]byte{}
	binary.LittleEndian.PutUint32(bits[0:4], value[0])
	binary.LittleEndian.PutUint32(bits[4:8], value[1])
	binary.LittleEndian.PutUint32(bits[8:12], value[2])
	return Value{
		kind: ^int8(Int96),
		ptr:  &bits[0],
		u64:  12, // set the length so we can use the ByteArray method
	}
}

func makeValueUint32(value uint32) Value {
	return Value{
		kind: ^int8(Int32),
		u64:  uint64(value),
	}
}

func makeValueUint64(value uint64) Value {
	return Value{
		kind: ^int8(Int64),
		u64:  value,
	}
}

func makeValueFloat(value float32) Value {
	return Value{
		kind: ^int8(Float),
		u64:  uint64(math.Float32bits(value)),
	}
}

func makeValueDouble(value float64) Value {
	return Value{
		kind: ^int8(Double),
		u64:  math.Float64bits(value),
	}
}

func makeValueBytes(kind Kind, value []byte) Value {
	return makeValueByteArray(kind, unsafecast.AddressOfBytes(value), len(value))
}

func makeValueString(kind Kind, value string) Value {
	return makeValueByteArray(kind, unsafecast.AddressOfString(value), len(value))
}

func makeValueFixedLenByteArray(v reflect.Value) Value {
	t := v.Type()
	// When the array is addressable, we take advantage of this
	// condition to avoid the heap allocation otherwise needed
	// to pack the reference into an interface{} value.
	if v.CanAddr() {
		v = v.Addr()
	} else {
		u := reflect.New(t)
		u.Elem().Set(v)
		v = u
	}
	return makeValueByteArray(FixedLenByteArray, (*byte)(unsafePointer(v)), t.Len())
}

func makeValueByteArray(kind Kind, data *byte, size int) Value {
	return Value{
		kind: ^int8(kind),
		ptr:  data,
		u64:  uint64(size),
	}
}

// These methods are internal versions of methods exported by the Value type,
// they are usually inlined by the compiler and intended to be used inside the
// parquet-go package because they tend to generate better code than their
// exported counter part, which requires making a copy of the receiver.
func (v *Value) isNull() bool            { return v.kind == 0 }
func (v *Value) byte() byte              { return byte(v.u64) }
func (v *Value) boolean() bool           { return v.u64 != 0 }
func (v *Value) int32() int32            { return int32(v.u64) }
func (v *Value) int64() int64            { return int64(v.u64) }
func (v *Value) int96() deprecated.Int96 { return makeInt96(v.byteArray()) }
func (v *Value) float() float32          { return math.Float32frombits(uint32(v.u64)) }
func (v *Value) double() float64         { return math.Float64frombits(uint64(v.u64)) }
func (v *Value) uint32() uint32          { return uint32(v.u64) }
func (v *Value) uint64() uint64          { return v.u64 }
func (v *Value) byteArray() []byte       { return unsafecast.Bytes(v.ptr, int(v.u64)) }
func (v *Value) string() string          { return unsafecast.BytesToString(v.byteArray()) }
func (v *Value) be128() *[16]byte        { return (*[16]byte)(unsafe.Pointer(v.ptr)) }
func (v *Value) column() int             { return int(^v.columnIndex) }

func (v Value) convertToBoolean(x bool) Value {
	v.kind = ^int8(Boolean)
	v.ptr = nil
	v.u64 = 0
	if x {
		v.u64 = 1
	}
	return v
}

func (v Value) convertToInt32(x int32) Value {
	v.kind = ^int8(Int32)
	v.ptr = nil
	v.u64 = uint64(x)
	return v
}

func (v Value) convertToInt64(x int64) Value {
	v.kind = ^int8(Int64)
	v.ptr = nil
	v.u64 = uint64(x)
	return v
}

func (v Value) convertToInt96(x deprecated.Int96) Value {
	i96 := makeValueInt96(x)
	v.kind = i96.kind
	v.ptr = i96.ptr
	v.u64 = i96.u64
	return v
}

func (v Value) convertToFloat(x float32) Value {
	v.kind = ^int8(Float)
	v.ptr = nil
	v.u64 = uint64(math.Float32bits(x))
	return v
}

func (v Value) convertToDouble(x float64) Value {
	v.kind = ^int8(Double)
	v.ptr = nil
	v.u64 = math.Float64bits(x)
	return v
}

func (v Value) convertToByteArray(x []byte) Value {
	v.kind = ^int8(ByteArray)
	v.ptr = unsafecast.AddressOfBytes(x)
	v.u64 = uint64(len(x))
	return v
}

func (v Value) convertToFixedLenByteArray(x []byte) Value {
	v.kind = ^int8(FixedLenByteArray)
	v.ptr = unsafecast.AddressOfBytes(x)
	v.u64 = uint64(len(x))
	return v
}

// Kind returns the kind of v, which represents its parquet physical type.
func (v Value) Kind() Kind { return ^Kind(v.kind) }

// IsNull returns true if v is the null value.
func (v Value) IsNull() bool { return v.isNull() }

// Byte returns v as a byte, which may truncate the underlying byte.
func (v Value) Byte() byte { return v.byte() }

// Boolean returns v as a bool, assuming the underlying type is BOOLEAN.
func (v Value) Boolean() bool { return v.boolean() }

// Int32 returns v as a int32, assuming the underlying type is INT32.
func (v Value) Int32() int32 { return v.int32() }

// Int64 returns v as a int64, assuming the underlying type is INT64.
func (v Value) Int64() int64 { return v.int64() }

// Int96 returns v as a int96, assuming the underlying type is INT96.
func (v Value) Int96() deprecated.Int96 {
	var val deprecated.Int96
	if !v.isNull() {
		val = v.int96()
	}
	return val
}

// Float returns v as a float32, assuming the underlying type is FLOAT.
func (v Value) Float() float32 { return v.float() }

// Double returns v as a float64, assuming the underlying type is DOUBLE.
func (v Value) Double() float64 { return v.double() }

// Uint32 returns v as a uint32, assuming the underlying type is INT32.
func (v Value) Uint32() uint32 { return v.uint32() }

// Uint64 returns v as a uint64, assuming the underlying type is INT64.
func (v Value) Uint64() uint64 { return v.uint64() }

// ByteArray returns v as a []byte, assuming the underlying type is either
// BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY.
//
// The application must treat the returned byte slice as a read-only value,
// mutating the content will result in undefined behaviors.
func (v Value) ByteArray() []byte { return v.byteArray() }

// RepetitionLevel returns the repetition level of v.
func (v Value) RepetitionLevel() int { return int(v.repetitionLevel) }

// DefinitionLevel returns the definition level of v.
func (v Value) DefinitionLevel() int { return int(v.definitionLevel) }

// Column returns the column index within the row that v was created from.
//
// Returns -1 if the value does not carry a column index.
func (v Value) Column() int { return v.column() }

// Bytes returns the binary representation of v.
//
// If v is the null value, an nil byte slice is returned.
func (v Value) Bytes() []byte {
	switch v.Kind() {
	case Boolean:
		buf := [8]byte{}
		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
		return buf[0:1]
	case Int32, Float:
		buf := [8]byte{}
		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
		return buf[:4]
	case Int64, Double:
		buf := [8]byte{}
		binary.LittleEndian.PutUint64(buf[:8], v.uint64())
		return buf[:8]
	case ByteArray, FixedLenByteArray, Int96:
		return v.byteArray()
	default:
		return nil
	}
}

// AppendBytes appends the binary representation of v to b.
//
// If v is the null value, b is returned unchanged.
func (v Value) AppendBytes(b []byte) []byte {
	buf := [8]byte{}
	switch v.Kind() {
	case Boolean:
		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
		return append(b, buf[0])
	case Int32, Float:
		binary.LittleEndian.PutUint32(buf[:4], v.uint32())
		return append(b, buf[:4]...)
	case Int64, Double:
		binary.LittleEndian.PutUint64(buf[:8], v.uint64())
		return append(b, buf[:8]...)
	case ByteArray, FixedLenByteArray, Int96:
		return append(b, v.byteArray()...)
	default:
		return b
	}
}

// Format outputs a human-readable representation of v to w, using r as the
// formatting verb to describe how the value should be printed.
//
// The following formatting options are supported:
//
//	%c	prints the column index
//	%+c	prints the column index, prefixed with "C:"
//	%d	prints the definition level
//	%+d	prints the definition level, prefixed with "D:"
//	%r	prints the repetition level
//	%+r	prints the repetition level, prefixed with "R:"
//	%q	prints the quoted representation of v
//	%+q	prints the quoted representation of v, prefixed with "V:"
//	%s	prints the string representation of v
//	%+s	prints the string representation of v, prefixed with "V:"
//	%v	same as %s
//	%+v	prints a verbose representation of v
//	%#v	prints a Go value representation of v
//
// Format satisfies the fmt.Formatter interface.
func (v Value) Format(w fmt.State, r rune) {
	switch r {
	case 'c':
		if w.Flag('+') {
			io.WriteString(w, "C:")
		}
		fmt.Fprint(w, v.column())

	case 'd':
		if w.Flag('+') {
			io.WriteString(w, "D:")
		}
		fmt.Fprint(w, v.definitionLevel)

	case 'r':
		if w.Flag('+') {
			io.WriteString(w, "R:")
		}
		fmt.Fprint(w, v.repetitionLevel)

	case 'q':
		if w.Flag('+') {
			io.WriteString(w, "V:")
		}
		switch v.Kind() {
		case ByteArray, FixedLenByteArray:
			fmt.Fprintf(w, "%q", v.byteArray())
		default:
			fmt.Fprintf(w, `"%s"`, v)
		}

	case 's':
		if w.Flag('+') {
			io.WriteString(w, "V:")
		}
		switch v.Kind() {
		case Boolean:
			fmt.Fprint(w, v.boolean())
		case Int32:
			fmt.Fprint(w, v.int32())
		case Int64:
			fmt.Fprint(w, v.int64())
		case Int96:
			fmt.Fprint(w, v.int96())
		case Float:
			fmt.Fprint(w, v.float())
		case Double:
			fmt.Fprint(w, v.double())
		case ByteArray, FixedLenByteArray:
			w.Write(v.byteArray())
		default:
			io.WriteString(w, "<null>")
		}

	case 'v':
		switch {
		case w.Flag('+'):
			fmt.Fprintf(w, "%+[1]c %+[1]d %+[1]r %+[1]s", v)
		case w.Flag('#'):
			v.formatGoString(w)
		default:
			v.Format(w, 's')
		}
	}
}

func (v Value) formatGoString(w fmt.State) {
	io.WriteString(w, "parquet.")
	switch v.Kind() {
	case Boolean:
		fmt.Fprintf(w, "BooleanValue(%t)", v.boolean())
	case Int32:
		fmt.Fprintf(w, "Int32Value(%d)", v.int32())
	case Int64:
		fmt.Fprintf(w, "Int64Value(%d)", v.int64())
	case Int96:
		fmt.Fprintf(w, "Int96Value(%#v)", v.int96())
	case Float:
		fmt.Fprintf(w, "FloatValue(%g)", v.float())
	case Double:
		fmt.Fprintf(w, "DoubleValue(%g)", v.double())
	case ByteArray:
		fmt.Fprintf(w, "ByteArrayValue(%q)", v.byteArray())
	case FixedLenByteArray:
		fmt.Fprintf(w, "FixedLenByteArrayValue(%#v)", v.byteArray())
	default:
		io.WriteString(w, "Value{}")
		return
	}
	fmt.Fprintf(w, ".Level(%d,%d,%d)",
		v.RepetitionLevel(),
		v.DefinitionLevel(),
		v.Column(),
	)
}

// String returns a string representation of v.
func (v Value) String() string {
	switch v.Kind() {
	case Boolean:
		return strconv.FormatBool(v.boolean())
	case Int32:
		return strconv.FormatInt(int64(v.int32()), 10)
	case Int64:
		return strconv.FormatInt(v.int64(), 10)
	case Int96:
		return v.Int96().String()
	case Float:
		return strconv.FormatFloat(float64(v.float()), 'g', -1, 32)
	case Double:
		return strconv.FormatFloat(v.double(), 'g', -1, 32)
	case ByteArray, FixedLenByteArray:
		return string(v.byteArray())
	default:
		return "<null>"
	}
}

// GoString returns a Go value string representation of v.
func (v Value) GoString() string { return fmt.Sprintf("%#v", v) }

// Level returns v with the repetition level, definition level, and column index
// set to the values passed as arguments.
//
// The method panics if either argument is negative.
func (v Value) Level(repetitionLevel, definitionLevel, columnIndex int) Value {
	v.repetitionLevel = makeRepetitionLevel(repetitionLevel)
	v.definitionLevel = makeDefinitionLevel(definitionLevel)
	v.columnIndex = ^makeColumnIndex(columnIndex)
	return v
}

// Clone returns a copy of v which does not share any pointers with it.
func (v Value) Clone() Value {
	switch k := v.Kind(); k {
	case ByteArray, FixedLenByteArray:
		v.ptr = unsafecast.AddressOfBytes(copyBytes(v.byteArray()))
	}
	return v
}

func makeInt96(bits []byte) (i96 deprecated.Int96) {
	return deprecated.Int96{
		2: binary.LittleEndian.Uint32(bits[8:12]),
		1: binary.LittleEndian.Uint32(bits[4:8]),
		0: binary.LittleEndian.Uint32(bits[0:4]),
	}
}

func parseValue(kind Kind, data []byte) (val Value, err error) {
	switch kind {
	case Boolean:
		if len(data) == 1 {
			val = makeValueBoolean(data[0] != 0)
		}
	case Int32:
		if len(data) == 4 {
			val = makeValueInt32(int32(binary.LittleEndian.Uint32(data)))
		}
	case Int64:
		if len(data) == 8 {
			val = makeValueInt64(int64(binary.LittleEndian.Uint64(data)))
		}
	case Int96:
		if len(data) == 12 {
			val = makeValueInt96(makeInt96(data))
		}
	case Float:
		if len(data) == 4 {
			val = makeValueFloat(float32(math.Float32frombits(binary.LittleEndian.Uint32(data))))
		}
	case Double:
		if len(data) == 8 {
			val = makeValueDouble(float64(math.Float64frombits(binary.LittleEndian.Uint64(data))))
		}
	case ByteArray, FixedLenByteArray:
		val = makeValueBytes(kind, data)
	}
	if val.isNull() {
		err = fmt.Errorf("cannot decode %s value from input of length %d", kind, len(data))
	}
	return val, err
}

func copyBytes(b []byte) []byte {
	c := make([]byte, len(b))
	copy(c, b)
	return c
}

// Equal returns true if v1 and v2 are equal.
//
// Values are considered equal if they are of the same physical type and hold
// the same Go values. For BYTE_ARRAY and FIXED_LEN_BYTE_ARRAY, the content of
// the underlying byte arrays are tested for equality.
//
// Note that the repetition levels, definition levels, and column indexes are
// not compared by this function, use DeepEqual instead.
func Equal(v1, v2 Value) bool {
	if v1.kind != v2.kind {
		return false
	}
	switch ^Kind(v1.kind) {
	case Boolean:
		return v1.boolean() == v2.boolean()
	case Int32:
		return v1.int32() == v2.int32()
	case Int64:
		return v1.int64() == v2.int64()
	case Int96:
		return v1.int96() == v2.int96()
	case Float:
		return v1.float() == v2.float()
	case Double:
		return v1.double() == v2.double()
	case ByteArray, FixedLenByteArray:
		return bytes.Equal(v1.byteArray(), v2.byteArray())
	case -1: // null
		return true
	default:
		return false
	}
}

// DeepEqual returns true if v1 and v2 are equal, including their repetition
// levels, definition levels, and column indexes.
//
// See Equal for details about how value equality is determined.
func DeepEqual(v1, v2 Value) bool {
	return Equal(v1, v2) &&
		v1.repetitionLevel == v2.repetitionLevel &&
		v1.definitionLevel == v2.definitionLevel &&
		v1.columnIndex == v2.columnIndex
}

var (
	_ fmt.Formatter = Value{}
	_ fmt.Stringer  = Value{}
)

func clearValues(values []Value) {
	for i := range values {
		values[i] = Value{}
	}
}

// BooleanReader is an interface implemented by ValueReader instances which
// expose the content of a column of boolean values.
type BooleanReader interface {
	// Read boolean values into the buffer passed as argument.
	//
	// The method returns io.EOF when all values have been read.
	ReadBooleans(values []bool) (int, error)
}

// BooleanWriter is an interface implemented by ValueWriter instances which
// support writing columns of boolean values.
type BooleanWriter interface {
	// Write boolean values.
	//
	// The method returns the number of values written, and any error that
	// occurred while writing the values.
	WriteBooleans(values []bool) (int, error)
}

// Int32Reader is an interface implemented by ValueReader instances which expose
// the content of a column of int32 values.
type Int32Reader interface {
	// Read 32 bits integer values into the buffer passed as argument.
	//
	// The method returns io.EOF when all values have been read.
	ReadInt32s(values []int32) (int, error)
}

// Int32Writer is an interface implemented by ValueWriter instances which
// support writing columns of 32 bits signed integer values.
type Int32Writer interface {
	// Write 32 bits signed integer values.
	//
	// The method returns the number of values written, and any error that
	// occurred while writing the values.
	WriteInt32s(values []int32) (int, error)
}

// Int64Reader is an interface implemented by ValueReader instances which expose
// the content of a column of int64 values.
type Int64Reader interface {
	// Read 64 bits integer values into the buffer passed as argument.
	//
	// The method returns io.EOF when all values have been read.
	ReadInt64s(values []int64) (int, error)
}

// Int64Writer is an interface implemented by ValueWriter instances which
// support writing columns of 64 bits signed integer values.
type Int64Writer interface {
	// Write 64 bits signed integer values.
	//
	// The method returns the number of values written, and any error that
	// occurred while writing the values.
	WriteInt64s(values []int64) (int, error)
}

// Int96Reader is an interface implemented by ValueReader instances which expose
// the content of a column of int96 values.
type Int96Reader interface {
	// Read 96 bits integer values into the buffer passed as argument.
	//
	// The method returns io.EOF when all values have been read.
	ReadInt96s(values []deprecated.Int96) (int, error)
}

// Int96Writer is an interface implemented by ValueWriter instances which
// support writing columns of 96 bits signed integer values.
type Int96Writer interface {
	// Write 96 bits signed integer values.
	//
	// The method returns the number of values written, and any error that
	// occurred while writing the values.
	WriteInt96s(values []deprecated.Int96) (int, error)
}

// FloatReader is an interface implemented by ValueReader instances which expose
// the content of a column of single-precision floating point values.
type FloatReader interface {
	// Read single-precision floating point values into the buffer passed as
	// argument.
	//
	// The method returns io.EOF when all values have been read.
	ReadFloats(values []float32) (int, error)
}

// FloatWriter is an interface implemented by ValueWriter instances which
// support writing columns of single-precision floating point values.
type FloatWriter interface {
	// Write single-precision floating point values.
	//
	// The method returns the number of values written, and any error that
	// occurred while writing the values.
	WriteFloats(values []float32) (int, error)
}

// DoubleReader is an interface implemented by ValueReader instances which
// expose the content of a column of double-precision float point values.
type DoubleReader interface {
	// Read double-precision floating point values into the buffer passed as
	// argument.
	//
	// The method returns io.EOF when all values have been read.
	ReadDoubles(values []float64) (int, error)
}

// DoubleWriter is an interface implemented by ValueWriter instances which
// support writing columns of double-precision floating point values.
type DoubleWriter interface {
	// Write double-precision floating point values.
	//
	// The method returns the number of values written, and any error that
	// occurred while writing the values.
	WriteDoubles(values []float64) (int, error)
}

// ByteArrayReader is an interface implemented by ValueReader instances which
// expose the content of a column of variable length byte array values.
type ByteArrayReader interface {
	// Read values into the byte buffer passed as argument, returning the number
	// of values written to the buffer (not the number of bytes). Values are
	// written using the PLAIN encoding, each byte array prefixed with its
	// length encoded as a 4 bytes little endian unsigned integer.
	//
	// The method returns io.EOF when all values have been read.
	//
	// If the buffer was not empty, but too small to hold at least one value,
	// io.ErrShortBuffer is returned.
	ReadByteArrays(values []byte) (int, error)
}

// ByteArrayWriter is an interface implemented by ValueWriter instances which
// support writing columns of variable length byte array values.
type ByteArrayWriter interface {
	// Write variable length byte array values.
	//
	// The values passed as input must be laid out using the PLAIN encoding,
	// with each byte array prefixed with the four bytes little endian unsigned
	// integer length.
	//
	// The method returns the number of values written to the underlying column
	// (not the number of bytes), or any error that occurred while attempting to
	// write the values.
	WriteByteArrays(values []byte) (int, error)
}

// FixedLenByteArrayReader is an interface implemented by ValueReader instances
// which expose the content of a column of fixed length byte array values.
type FixedLenByteArrayReader interface {
	// Read values into the byte buffer passed as argument, returning the number
	// of values written to the buffer (not the number of bytes).
	//
	// The method returns io.EOF when all values have been read.
	//
	// If the buffer was not empty, but too small to hold at least one value,
	// io.ErrShortBuffer is returned.
	ReadFixedLenByteArrays(values []byte) (int, error)
}

// FixedLenByteArrayWriter is an interface implemented by ValueWriter instances
// which support writing columns of fixed length byte array values.
type FixedLenByteArrayWriter interface {
	// Writes the fixed length byte array values.
	//
	// The size of the values is assumed to be the same as the expected size of
	// items in the column. The method errors if the length of the input values
	// is not a multiple of the expected item size.
	WriteFixedLenByteArrays(values []byte) (int, error)
}


================================================
FILE: value_amd64.go
================================================
//go:build !purego

package parquet

import "golang.org/x/sys/cpu"

//go:noescape
func memsetValuesAVX2(values []Value, model Value, _ uint64)

func memsetValues(values []Value, model Value) {
	if cpu.X86.HasAVX2 {
		memsetValuesAVX2(values, model, 0)
	} else {
		for i := range values {
			values[i] = model
		}
	}
}


================================================
FILE: value_amd64.s
================================================
//go:build !purego

#include "textflag.h"

#define sizeOfValue 24

// This function is an optimized implementation of the memsetValues function
// which assigns the parquet.Value passed as second argument to all elements of
// the first slice argument.
//
// The optimizations relies on the fact that we can pack 4 parquet.Value values
// into 3 YMM registers (24 x 4 = 32 x 3 = 96).
//
// func memsetValuesAVX2(values []Value, model Value, _ uint64)
TEXT ·memsetValuesAVX2(SB), NOSPLIT, $0-56 // 48 + padding to load model in YMM
    MOVQ values_base+0(FP), AX
    MOVQ values_len+8(FP), BX

    MOVQ model_ptr+24(FP), R10
    MOVQ model_u64+32(FP), R11
    MOVQ model+40(FP), R12 // go vet complains about this line but it's OK

    XORQ SI, SI // byte index
    MOVQ BX, DI // byte count
    IMULQ $sizeOfValue, DI

    CMPQ BX, $4
    JB test

    MOVQ BX, R8
    SHRQ $2, R8
    SHLQ $2, R8
    IMULQ $sizeOfValue, R8

    VMOVDQU model+24(FP), Y0
    VMOVDQU Y0, Y1
    VMOVDQU Y0, Y2

    VPERMQ $0b00100100, Y0, Y0
    VPERMQ $0b01001001, Y1, Y1
    VPERMQ $0b10010010, Y2, Y2
loop4:
    VMOVDQU Y0, 0(AX)(SI*1)
    VMOVDQU Y1, 32(AX)(SI*1)
    VMOVDQU Y2, 64(AX)(SI*1)
    ADDQ $4*sizeOfValue, SI
    CMPQ SI, R8
    JNE loop4
    VZEROUPPER
    JMP test
loop:
    MOVQ R10, 0(AX)(SI*1)
    MOVQ R11, 8(AX)(SI*1)
    MOVQ R12, 16(AX)(SI*1)
    ADDQ $sizeOfValue, SI
test:
    CMPQ SI, DI
    JNE loop
    RET


================================================
FILE: value_go17.go
================================================
//go:build !go1.18

package parquet

import (
	"reflect"
	"unsafe"
)

func unsafePointer(v reflect.Value) unsafe.Pointer {
	// This may not have been a safe conversion but there were no better way
	// prior to Go 1.18 and the introduction of reflect.Value.UnsafePointer.
	return unsafe.Pointer(v.Pointer())
}


================================================
FILE: value_go18.go
================================================
//go:build go1.18

package parquet

import (
	"reflect"
	"unsafe"
)

// This function exists for backward compatibility with the Go 1.17 build which
// has a different implementation.
//
// TODO: remove when we drop support for Go versions prior to 1.18.
func unsafePointer(v reflect.Value) unsafe.Pointer { return v.UnsafePointer() }


================================================
FILE: value_test.go
================================================
package parquet_test

import (
	"bytes"
	"math"
	"testing"
	"time"
	"unsafe"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/deprecated"
)

func TestSizeOfValue(t *testing.T) {
	t.Logf("sizeof(parquet.Value) = %d", unsafe.Sizeof(parquet.Value{}))
}

func BenchmarkValueAppend(b *testing.B) {
	const N = 1024
	row := make(parquet.Row, 0, N)
	val := parquet.ValueOf(42)

	for i := 0; i < b.N; i++ {
		row = row[:0]
		for j := 0; j < N; j++ {
			row = append(row, val)
		}
	}

	b.SetBytes(N * int64(unsafe.Sizeof(parquet.Value{})))
}

func TestValueClone(t *testing.T) {
	tests := []struct {
		scenario string
		values   []interface{}
	}{
		{
			scenario: "BOOLEAN",
			values:   []interface{}{false, true},
		},

		{
			scenario: "INT32",
			values:   []interface{}{int32(0), int32(1), int32(math.MinInt32), int32(math.MaxInt32)},
		},

		{
			scenario: "INT64",
			values:   []interface{}{int64(0), int64(1), int64(math.MinInt64), int64(math.MaxInt64)},
		},

		{
			scenario: "FLOAT",
			values:   []interface{}{float32(0), float32(1), float32(-1)},
		},

		{
			scenario: "DOUBLE",
			values:   []interface{}{float64(0), float64(1), float64(-1)},
		},

		{
			scenario: "BYTE_ARRAY",
			values:   []interface{}{"", "A", "ABC", "Hello World!"},
		},

		{
			scenario: "FIXED_LEN_BYTE_ARRAY",
			values:   []interface{}{[1]byte{42}, [16]byte{0: 1}},
		},

		{
			scenario: "TIME",
			values: []interface{}{
				time.Date(2020, 1, 2, 3, 4, 5, 7, time.UTC),
				time.Date(2021, 2, 3, 4, 5, 6, 8, time.UTC),
			},
		},
	}

	for _, test := range tests {
		t.Run(test.scenario, func(t *testing.T) {
			for _, value := range test.values {
				v := parquet.ValueOf(value)
				c := v.Clone()

				if !parquet.DeepEqual(v, c) {
					t.Errorf("cloned values are not equal: want=%#v got=%#v", v, c)
				}
				if v.RepetitionLevel() != c.RepetitionLevel() {
					t.Error("cloned values do not have the same repetition level")
				}
				if v.DefinitionLevel() != c.DefinitionLevel() {
					t.Error("cloned values do not have the same definition level")
				}
				if v.Column() != c.Column() {
					t.Error("cloned values do not have the same column index")
				}
			}
		})
	}
}

func TestZeroValue(t *testing.T) {
	var v parquet.Value
	if !v.IsNull() {
		t.Error("expected zero value parquet.Value to be null")
	}

	if v.Byte() != byte(0) {
		t.Errorf("byte not zero value: got=%#v", v.Byte())
	}

	if v.Boolean() != false {
		t.Errorf("boolean not zero value: got=%#v", v.Boolean())
	}

	if v.Int32() != 0 {
		t.Errorf("int32 not zero value: got=%#v", v.Int32())
	}

	if v.Int64() != 0 {
		t.Errorf("int64 not zero value: got=%#v", v.Int64())
	}

	var zeroInt96 deprecated.Int96
	if v.Int96() != zeroInt96 {
		t.Errorf("int96 not zero value: got=%#v", zeroInt96)
	}

	if v.Float() != 0 {
		t.Errorf("float not zero value: got=%#v", v.Float())
	}

	if v.Double() != 0 {
		t.Errorf("double not zero value: got=%#v", v.Double())
	}

	if v.Uint32() != 0 {
		t.Errorf("uint32 not zero value: got=%#v", v.Uint32())
	}

	if v.Uint64() != 0 {
		t.Errorf("uint64 not zero value: got=%#v", v.Uint64())
	}

	var zeroByte []byte
	if !bytes.Equal(v.ByteArray(), zeroByte) {
		t.Errorf("byte array not zero value: got=%#v", v.ByteArray())
	}
}


================================================
FILE: values_purego.go
================================================
//go:build purego || !amd64

package parquet

func memsetValues(values []Value, model Value) {
	for i := range values {
		values[i] = model
	}
}


================================================
FILE: writer.go
================================================
package parquet

import (
	"bufio"
	"bytes"
	"encoding/binary"
	"fmt"
	"hash/crc32"
	"io"
	"math/bits"
	"sort"

	"github.com/segmentio/encoding/thrift"
	"github.com/segmentio/parquet-go/compress"
	"github.com/segmentio/parquet-go/encoding"
	"github.com/segmentio/parquet-go/encoding/plain"
	"github.com/segmentio/parquet-go/format"
)

// Deprecated: A Writer uses a parquet schema and sequence of Go values to
// produce a parquet file to an io.Writer.
//
// This example showcases a typical use of parquet writers:
//
//	writer := parquet.NewWriter(output)
//
//	for _, row := range rows {
//		if err := writer.Write(row); err != nil {
//			...
//		}
//	}
//
//	if err := writer.Close(); err != nil {
//		...
//	}
//
// The Writer type optimizes for minimal memory usage, each page is written as
// soon as it has been filled so only a single page per column needs to be held
// in memory and as a result, there are no opportunities to sort rows within an
// entire row group. Programs that need to produce parquet files with sorted
// row groups should use the Buffer type to buffer and sort the rows prior to
// writing them to a Writer.
//
// For programs building with Go 1.18 or later, the GenericWriter[T] type
// supersedes this one.
type Writer struct {
	output io.Writer
	config *WriterConfig
	schema *Schema
	writer *writer
	rowbuf []Row
}

// NewWriter constructs a parquet writer writing a file to the given io.Writer.
//
// The function panics if the writer configuration is invalid. Programs that
// cannot guarantee the validity of the options passed to NewWriter should
// construct the writer configuration independently prior to calling this
// function:
//
//	config, err := parquet.NewWriterConfig(options...)
//	if err != nil {
//		// handle the configuration error
//		...
//	} else {
//		// this call to create a writer is guaranteed not to panic
//		writer := parquet.NewWriter(output, config)
//		...
//	}
func NewWriter(output io.Writer, options ...WriterOption) *Writer {
	config, err := NewWriterConfig(options...)
	if err != nil {
		panic(err)
	}
	w := &Writer{
		output: output,
		config: config,
	}
	if config.Schema != nil {
		w.configure(config.Schema)
	}
	return w
}

func (w *Writer) configure(schema *Schema) {
	if schema != nil {
		w.config.Schema = schema
		w.schema = schema
		w.writer = newWriter(w.output, w.config)
	}
}

// Close must be called after all values were produced to the writer in order to
// flush all buffers and write the parquet footer.
func (w *Writer) Close() error {
	if w.writer != nil {
		return w.writer.close()
	}
	return nil
}

// Flush flushes all buffers into a row group to the underlying io.Writer.
//
// Flush is called automatically on Close, it is only useful to call explicitly
// if the application needs to limit the size of row groups or wants to produce
// multiple row groups per file.
//
// If the writer attempts to create more than MaxRowGroups row groups the method
// returns ErrTooManyRowGroups.
func (w *Writer) Flush() error {
	if w.writer != nil {
		return w.writer.flush()
	}
	return nil
}

// Reset clears the state of the writer without flushing any of the buffers,
// and setting the output to the io.Writer passed as argument, allowing the
// writer to be reused to produce another parquet file.
//
// Reset may be called at any time, including after a writer was closed.
func (w *Writer) Reset(output io.Writer) {
	if w.output = output; w.writer != nil {
		w.writer.reset(w.output)
	}
}

// Write is called to write another row to the parquet file.
//
// The method uses the parquet schema configured on w to traverse the Go value
// and decompose it into a set of columns and values. If no schema were passed
// to NewWriter, it is deducted from the Go type of the row, which then have to
// be a struct or pointer to struct.
func (w *Writer) Write(row interface{}) error {
	if w.schema == nil {
		w.configure(SchemaOf(row))
	}
	if cap(w.rowbuf) == 0 {
		w.rowbuf = make([]Row, 1)
	} else {
		w.rowbuf = w.rowbuf[:1]
	}
	defer clearRows(w.rowbuf)
	w.rowbuf[0] = w.schema.Deconstruct(w.rowbuf[0][:0], row)
	_, err := w.WriteRows(w.rowbuf)
	return err
}

// WriteRows is called to write rows to the parquet file.
//
// The Writer must have been given a schema when NewWriter was called, otherwise
// the structure of the parquet file cannot be determined from the row only.
//
// The row is expected to contain values for each column of the writer's schema,
// in the order produced by the parquet.(*Schema).Deconstruct method.
func (w *Writer) WriteRows(rows []Row) (int, error) {
	return w.writer.WriteRows(rows)
}

// WriteRowGroup writes a row group to the parquet file.
//
// Buffered rows will be flushed prior to writing rows from the group, unless
// the row group was empty in which case nothing is written to the file.
//
// The content of the row group is flushed to the writer; after the method
// returns successfully, the row group will be empty and in ready to be reused.
func (w *Writer) WriteRowGroup(rowGroup RowGroup) (int64, error) {
	rowGroupSchema := rowGroup.Schema()
	switch {
	case rowGroupSchema == nil:
		return 0, ErrRowGroupSchemaMissing
	case w.schema == nil:
		w.configure(rowGroupSchema)
	case !nodesAreEqual(w.schema, rowGroupSchema):
		return 0, ErrRowGroupSchemaMismatch
	}
	if err := w.writer.flush(); err != nil {
		return 0, err
	}
	w.writer.configureBloomFilters(rowGroup.ColumnChunks())
	rows := rowGroup.Rows()
	defer rows.Close()
	n, err := CopyRows(w.writer, rows)
	if err != nil {
		return n, err
	}
	return w.writer.writeRowGroup(rowGroup.Schema(), rowGroup.SortingColumns())
}

// ReadRowsFrom reads rows from the reader passed as arguments and writes them
// to w.
//
// This is similar to calling WriteRow repeatedly, but will be more efficient
// if optimizations are supported by the reader.
func (w *Writer) ReadRowsFrom(rows RowReader) (written int64, err error) {
	if w.schema == nil {
		if r, ok := rows.(RowReaderWithSchema); ok {
			w.configure(r.Schema())
		}
	}
	if cap(w.rowbuf) < defaultRowBufferSize {
		w.rowbuf = make([]Row, defaultRowBufferSize)
	} else {
		w.rowbuf = w.rowbuf[:cap(w.rowbuf)]
	}
	return copyRows(w.writer, rows, w.rowbuf)
}

// Schema returns the schema of rows written by w.
//
// The returned value will be nil if no schema has yet been configured on w.
func (w *Writer) Schema() *Schema { return w.schema }

// SetKeyValueMetadata sets a key/value pair in the Parquet file metadata.
//
// Keys are assumed to be unique, if the same key is repeated multiple times the
// last value is retained. While the parquet format does not require unique keys,
// this design decision was made to optimize for the most common use case where
// applications leverage this extension mechanism to associate single values to
// keys. This may create incompatibilities with other parquet libraries, or may
// cause some key/value pairs to be lost when open parquet files written with
// repeated keys. We can revisit this decision if it ever becomes a blocker.
func (w *Writer) SetKeyValueMetadata(key, value string) {
	for i, kv := range w.writer.metadata {
		if kv.Key == key {
			kv.Value = value
			w.writer.metadata[i] = kv
			return
		}
	}
	w.writer.metadata = append(w.writer.metadata, format.KeyValue{
		Key:   key,
		Value: value,
	})
}

type writer struct {
	buffer  *bufio.Writer
	writer  offsetTrackingWriter
	values  [][]Value
	numRows int64
	maxRows int64

	createdBy string
	metadata  []format.KeyValue

	columns     []*writerColumn
	columnChunk []format.ColumnChunk
	columnIndex []format.ColumnIndex
	offsetIndex []format.OffsetIndex

	columnOrders   []format.ColumnOrder
	schemaElements []format.SchemaElement
	rowGroups      []format.RowGroup
	columnIndexes  [][]format.ColumnIndex
	offsetIndexes  [][]format.OffsetIndex
	sortingColumns []format.SortingColumn
}

func newWriter(output io.Writer, config *WriterConfig) *writer {
	w := new(writer)
	if config.WriteBufferSize <= 0 {
		w.writer.Reset(output)
	} else {
		w.buffer = bufio.NewWriterSize(output, config.WriteBufferSize)
		w.writer.Reset(w.buffer)
	}
	w.maxRows = config.MaxRowsPerRowGroup
	w.createdBy = config.CreatedBy
	w.metadata = make([]format.KeyValue, 0, len(config.KeyValueMetadata))
	for k, v := range config.KeyValueMetadata {
		w.metadata = append(w.metadata, format.KeyValue{Key: k, Value: v})
	}
	sortKeyValueMetadata(w.metadata)
	w.sortingColumns = make([]format.SortingColumn, len(config.Sorting.SortingColumns))

	config.Schema.forEachNode(func(name string, node Node) {
		nodeType := node.Type()

		repetitionType := (*format.FieldRepetitionType)(nil)
		if node != config.Schema { // the root has no repetition type
			repetitionType = fieldRepetitionTypePtrOf(node)
		}

		// For backward compatibility with older readers, the parquet specification
		// recommends to set the scale and precision on schema elements when the
		// column is of logical type decimal.
		logicalType := nodeType.LogicalType()
		scale, precision := (*int32)(nil), (*int32)(nil)
		if logicalType != nil && logicalType.Decimal != nil {
			scale = &logicalType.Decimal.Scale
			precision = &logicalType.Decimal.Precision
		}

		typeLength := (*int32)(nil)
		if n := int32(nodeType.Length()); n > 0 {
			typeLength = &n
		}

		w.schemaElements = append(w.schemaElements, format.SchemaElement{
			Type:           nodeType.PhysicalType(),
			TypeLength:     typeLength,
			RepetitionType: repetitionType,
			Name:           name,
			NumChildren:    int32(len(node.Fields())),
			ConvertedType:  nodeType.ConvertedType(),
			Scale:          scale,
			Precision:      precision,
			LogicalType:    logicalType,
		})
	})

	dataPageType := format.DataPage
	if config.DataPageVersion == 2 {
		dataPageType = format.DataPageV2
	}

	defaultCompression := config.Compression
	if defaultCompression == nil {
		defaultCompression = &Uncompressed
	}

	// Those buffers are scratch space used to generate the page header and
	// content, they are shared by all column chunks because they are only
	// used during calls to writeDictionaryPage or writeDataPage, which are
	// not done concurrently.
	buffers := new(writerBuffers)

	forEachLeafColumnOf(config.Schema, func(leaf leafColumn) {
		encoding := encodingOf(leaf.node)
		dictionary := Dictionary(nil)
		columnType := leaf.node.Type()
		columnIndex := int(leaf.columnIndex)
		compression := leaf.node.Compression()

		if compression == nil {
			compression = defaultCompression
		}

		if isDictionaryEncoding(encoding) {
			dictBuffer := columnType.NewValues(
				make([]byte, 0, defaultDictBufferSize),
				nil,
			)
			dictionary = columnType.NewDictionary(columnIndex, 0, dictBuffer)
			columnType = dictionary.Type()
		}

		c := &writerColumn{
			buffers:            buffers,
			pool:               config.ColumnPageBuffers,
			columnPath:         leaf.path,
			columnType:         columnType,
			columnIndex:        columnType.NewColumnIndexer(config.ColumnIndexSizeLimit),
			columnFilter:       searchBloomFilterColumn(config.BloomFilters, leaf.path),
			compression:        compression,
			dictionary:         dictionary,
			dataPageType:       dataPageType,
			maxRepetitionLevel: leaf.maxRepetitionLevel,
			maxDefinitionLevel: leaf.maxDefinitionLevel,
			bufferIndex:        int32(leaf.columnIndex),
			bufferSize:         int32(float64(config.PageBufferSize) * 0.98),
			writePageStats:     config.DataPageStatistics,
			encodings:          make([]format.Encoding, 0, 3),
			// Data pages in version 2 can omit compression when dictionary
			// encoding is employed; only the dictionary page needs to be
			// compressed, the data pages are encoded with the hybrid
			// RLE/Bit-Pack encoding which doesn't benefit from an extra
			// compression layer.
			isCompressed: isCompressed(compression) && (dataPageType != format.DataPageV2 || dictionary == nil),
		}

		c.header.encoder.Reset(c.header.protocol.NewWriter(&buffers.header))

		if leaf.maxDefinitionLevel > 0 {
			c.encodings = addEncoding(c.encodings, format.RLE)
		}

		if isDictionaryEncoding(encoding) {
			c.encodings = addEncoding(c.encodings, format.Plain)
		}

		c.encoding = encoding
		c.encodings = addEncoding(c.encodings, c.encoding.Encoding())
		sortPageEncodings(c.encodings)

		w.columns = append(w.columns, c)

		if sortingIndex := searchSortingColumn(config.Sorting.SortingColumns, leaf.path); sortingIndex < len(w.sortingColumns) {
			w.sortingColumns[sortingIndex] = format.SortingColumn{
				ColumnIdx:  int32(leaf.columnIndex),
				Descending: config.Sorting.SortingColumns[sortingIndex].Descending(),
				NullsFirst: config.Sorting.SortingColumns[sortingIndex].NullsFirst(),
			}
		}
	})

	// Pre-allocate the backing array so that in most cases where the rows
	// contain a single value we will hit collocated memory areas when writing
	// rows to the writer. This won't benefit repeated columns much but in that
	// case we would just waste a bit of memory which we can afford.
	values := make([]Value, len(w.columns))
	w.values = make([][]Value, len(w.columns))
	for i := range values {
		w.values[i] = values[i : i : i+1]
	}

	w.columnChunk = make([]format.ColumnChunk, len(w.columns))
	w.columnIndex = make([]format.ColumnIndex, len(w.columns))
	w.offsetIndex = make([]format.OffsetIndex, len(w.columns))
	w.columnOrders = make([]format.ColumnOrder, len(w.columns))

	for i, c := range w.columns {
		w.columnChunk[i] = format.ColumnChunk{
			MetaData: format.ColumnMetaData{
				Type:             format.Type(c.columnType.Kind()),
				Encoding:         c.encodings,
				PathInSchema:     c.columnPath,
				Codec:            c.compression.CompressionCodec(),
				KeyValueMetadata: nil, // TODO
			},
		}
	}

	for i, c := range w.columns {
		c.columnChunk = &w.columnChunk[i]
		c.offsetIndex = &w.offsetIndex[i]
	}

	for i, c := range w.columns {
		w.columnOrders[i] = *c.columnType.ColumnOrder()
	}

	return w
}

func (w *writer) reset(writer io.Writer) {
	if w.buffer == nil {
		w.writer.Reset(writer)
	} else {
		w.buffer.Reset(writer)
		w.writer.Reset(w.buffer)
	}
	for _, c := range w.columns {
		c.reset()
	}
	for i := range w.rowGroups {
		w.rowGroups[i] = format.RowGroup{}
	}
	for i := range w.columnIndexes {
		w.columnIndexes[i] = nil
	}
	for i := range w.offsetIndexes {
		w.offsetIndexes[i] = nil
	}
	w.rowGroups = w.rowGroups[:0]
	w.columnIndexes = w.columnIndexes[:0]
	w.offsetIndexes = w.offsetIndexes[:0]
}

func (w *writer) close() error {
	if err := w.writeFileHeader(); err != nil {
		return err
	}
	if err := w.flush(); err != nil {
		return err
	}
	if err := w.writeFileFooter(); err != nil {
		return err
	}
	if w.buffer != nil {
		return w.buffer.Flush()
	}
	return nil
}

func (w *writer) flush() error {
	_, err := w.writeRowGroup(nil, nil)
	return err
}

func (w *writer) writeFileHeader() error {
	if w.writer.writer == nil {
		return io.ErrClosedPipe
	}
	if w.writer.offset == 0 {
		_, err := w.writer.WriteString("PAR1")
		return err
	}
	return nil
}

func (w *writer) configureBloomFilters(columnChunks []ColumnChunk) {
	for i, c := range w.columns {
		if c.columnFilter != nil {
			c.resizeBloomFilter(columnChunks[i].NumValues())
		}
	}
}

func (w *writer) writeFileFooter() error {
	// The page index is composed of two sections: column and offset indexes.
	// They are written after the row groups, right before the footer (which
	// is written by the parent Writer.Close call).
	//
	// This section both writes the page index and generates the values of
	// ColumnIndexOffset, ColumnIndexLength, OffsetIndexOffset, and
	// OffsetIndexLength in the corresponding columns of the file metadata.
	//
	// Note: the page index is always written, even if we created data pages v1
	// because the parquet format is backward compatible in this case. Older
	// readers will simply ignore this section since they do not know how to
	// decode its content, nor have loaded any metadata to reference it.
	protocol := new(thrift.CompactProtocol)
	encoder := thrift.NewEncoder(protocol.NewWriter(&w.writer))

	for i, columnIndexes := range w.columnIndexes {
		rowGroup := &w.rowGroups[i]
		for j := range columnIndexes {
			column := &rowGroup.Columns[j]
			column.ColumnIndexOffset = w.writer.offset
			if err := encoder.Encode(&columnIndexes[j]); err != nil {
				return err
			}
			column.ColumnIndexLength = int32(w.writer.offset - column.ColumnIndexOffset)
		}
	}

	for i, offsetIndexes := range w.offsetIndexes {
		rowGroup := &w.rowGroups[i]
		for j := range offsetIndexes {
			column := &rowGroup.Columns[j]
			column.OffsetIndexOffset = w.writer.offset
			if err := encoder.Encode(&offsetIndexes[j]); err != nil {
				return err
			}
			column.OffsetIndexLength = int32(w.writer.offset - column.OffsetIndexOffset)
		}
	}

	numRows := int64(0)
	for rowGroupIndex := range w.rowGroups {
		numRows += w.rowGroups[rowGroupIndex].NumRows
	}

	footer, err := thrift.Marshal(new(thrift.CompactProtocol), &format.FileMetaData{
		Version:          1,
		Schema:           w.schemaElements,
		NumRows:          numRows,
		RowGroups:        w.rowGroups,
		KeyValueMetadata: w.metadata,
		CreatedBy:        w.createdBy,
		ColumnOrders:     w.columnOrders,
	})
	if err != nil {
		return err
	}

	length := len(footer)
	footer = append(footer, 0, 0, 0, 0)
	footer = append(footer, "PAR1"...)
	binary.LittleEndian.PutUint32(footer[length:], uint32(length))

	_, err = w.writer.Write(footer)
	return err
}

func (w *writer) writeRowGroup(rowGroupSchema *Schema, rowGroupSortingColumns []SortingColumn) (int64, error) {
	numRows := w.columns[0].totalRowCount()
	if numRows == 0 {
		return 0, nil
	}

	if len(w.rowGroups) == MaxRowGroups {
		return 0, ErrTooManyRowGroups
	}

	defer func() {
		w.numRows = 0
		for _, c := range w.columns {
			c.reset()
		}
		for i := range w.columnIndex {
			w.columnIndex[i] = format.ColumnIndex{}
		}
	}()

	for _, c := range w.columns {
		if err := c.flush(); err != nil {
			return 0, err
		}
		if err := c.flushFilterPages(); err != nil {
			return 0, err
		}
	}

	if err := w.writeFileHeader(); err != nil {
		return 0, err
	}
	fileOffset := w.writer.offset

	for _, c := range w.columns {
		if len(c.filter) > 0 {
			c.columnChunk.MetaData.BloomFilterOffset = w.writer.offset
			if err := c.writeBloomFilter(&w.writer); err != nil {
				return 0, err
			}
		}
	}

	for i, c := range w.columns {
		w.columnIndex[i] = format.ColumnIndex(c.columnIndex.ColumnIndex())

		if c.dictionary != nil {
			c.columnChunk.MetaData.DictionaryPageOffset = w.writer.offset
			if err := c.writeDictionaryPage(&w.writer, c.dictionary); err != nil {
				return 0, fmt.Errorf("writing dictionary page of row group colum %d: %w", i, err)
			}
		}

		dataPageOffset := w.writer.offset
		c.columnChunk.MetaData.DataPageOffset = dataPageOffset
		for j := range c.offsetIndex.PageLocations {
			c.offsetIndex.PageLocations[j].Offset += dataPageOffset
		}

		for _, page := range c.pages {
			if _, err := io.Copy(&w.writer, page); err != nil {
				return 0, fmt.Errorf("writing buffered pages of row group column %d: %w", i, err)
			}
		}
	}

	totalByteSize := int64(0)
	totalCompressedSize := int64(0)

	for i := range w.columnChunk {
		c := &w.columnChunk[i].MetaData
		sortPageEncodingStats(c.EncodingStats)
		totalByteSize += int64(c.TotalUncompressedSize)
		totalCompressedSize += int64(c.TotalCompressedSize)
	}

	sortingColumns := w.sortingColumns
	if len(sortingColumns) == 0 && len(rowGroupSortingColumns) > 0 {
		sortingColumns = make([]format.SortingColumn, 0, len(rowGroupSortingColumns))
		forEachLeafColumnOf(rowGroupSchema, func(leaf leafColumn) {
			if sortingIndex := searchSortingColumn(rowGroupSortingColumns, leaf.path); sortingIndex < len(sortingColumns) {
				sortingColumns[sortingIndex] = format.SortingColumn{
					ColumnIdx:  int32(leaf.columnIndex),
					Descending: rowGroupSortingColumns[sortingIndex].Descending(),
					NullsFirst: rowGroupSortingColumns[sortingIndex].NullsFirst(),
				}
			}
		})
	}

	columns := make([]format.ColumnChunk, len(w.columnChunk))
	copy(columns, w.columnChunk)

	columnIndex := make([]format.ColumnIndex, len(w.columnIndex))
	copy(columnIndex, w.columnIndex)

	offsetIndex := make([]format.OffsetIndex, len(w.offsetIndex))
	copy(offsetIndex, w.offsetIndex)

	for i := range columns {
		c := &columns[i]
		c.MetaData.EncodingStats = make([]format.PageEncodingStats, len(c.MetaData.EncodingStats))
		copy(c.MetaData.EncodingStats, w.columnChunk[i].MetaData.EncodingStats)
	}

	for i := range offsetIndex {
		c := &offsetIndex[i]
		c.PageLocations = make([]format.PageLocation, len(c.PageLocations))
		copy(c.PageLocations, w.offsetIndex[i].PageLocations)
	}

	w.rowGroups = append(w.rowGroups, format.RowGroup{
		Columns:             columns,
		TotalByteSize:       totalByteSize,
		NumRows:             numRows,
		SortingColumns:      sortingColumns,
		FileOffset:          fileOffset,
		TotalCompressedSize: totalCompressedSize,
		Ordinal:             int16(len(w.rowGroups)),
	})

	w.columnIndexes = append(w.columnIndexes, columnIndex)
	w.offsetIndexes = append(w.offsetIndexes, offsetIndex)
	return numRows, nil
}

func (w *writer) WriteRows(rows []Row) (int, error) {
	return w.writeRows(len(rows), func(start, end int) (int, error) {
		defer func() {
			for i, values := range w.values {
				clearValues(values)
				w.values[i] = values[:0]
			}
		}()

		// TODO: if an error occurs in this method the writer may be left in an
		// partially functional state. Applications are not expected to continue
		// using the writer after getting an error, but maybe we could ensure that
		// we are preventing further use as well?
		for _, row := range rows[start:end] {
			row.Range(func(columnIndex int, columnValues []Value) bool {
				w.values[columnIndex] = append(w.values[columnIndex], columnValues...)
				return true
			})
		}

		for i, values := range w.values {
			if len(values) > 0 {
				if err := w.columns[i].writeRows(values); err != nil {
					return 0, err
				}
			}
		}

		return end - start, nil
	})
}

func (w *writer) writeRows(numRows int, write func(i, j int) (int, error)) (int, error) {
	written := 0

	for written < numRows {
		remain := w.maxRows - w.numRows
		length := numRows - written

		if remain == 0 {
			remain = w.maxRows

			if err := w.flush(); err != nil {
				return written, err
			}
		}

		if remain < int64(length) {
			length = int(remain)
		}

		// Since the writer cannot flush pages across row boundaries, calls to
		// WriteRows with very large slices can result in greatly exceeding the
		// target page size. To set a limit to the impact of these large writes
		// we chunk the input in slices of 64 rows.
		//
		// Note that this mechanism isn't perfect; for example, values may hold
		// large byte slices which could still cause the column buffers to grow
		// beyond the target page size.
		const maxRowsPerWrite = 64
		if length > maxRowsPerWrite {
			length = maxRowsPerWrite
		}

		n, err := write(written, written+length)
		written += n
		w.numRows += int64(n)
		if err != nil {
			return written, err
		}
	}

	return written, nil
}

// The WriteValues method is intended to work in pair with WritePage to allow
// programs to target writing values to specific columns of of the writer.
func (w *writer) WriteValues(values []Value) (numValues int, err error) {
	return w.columns[values[0].Column()].WriteValues(values)
}

// One writerBuffers is used by each writer instance, the memory buffers here
// are shared by all columns of the writer because serialization is not done
// concurrently, which helps keep memory utilization low, both in the total
// footprint and GC cost.
//
// The type also exposes helper methods to facilitate the generation of parquet
// pages. A scratch space is used when serialization requires combining multiple
// buffers or compressing the page data, with double-buffering technique being
// employed by swapping the scratch and page buffers to minimize memory copies.
type writerBuffers struct {
	header      bytes.Buffer // buffer where page headers are encoded
	repetitions []byte       // buffer used to encode repetition levels
	definitions []byte       // buffer used to encode definition levels
	page        []byte       // page buffer holding the page data
	scratch     []byte       // scratch space used for compression
}

func (wb *writerBuffers) crc32() (checksum uint32) {
	checksum = crc32.Update(checksum, crc32.IEEETable, wb.repetitions)
	checksum = crc32.Update(checksum, crc32.IEEETable, wb.definitions)
	checksum = crc32.Update(checksum, crc32.IEEETable, wb.page)
	return checksum
}

func (wb *writerBuffers) size() int {
	return len(wb.repetitions) + len(wb.definitions) + len(wb.page)
}

func (wb *writerBuffers) reset() {
	wb.repetitions = wb.repetitions[:0]
	wb.definitions = wb.definitions[:0]
	wb.page = wb.page[:0]
}

func encodeLevels(dst, src []byte, maxLevel byte) ([]byte, error) {
	bitWidth := bits.Len8(maxLevel)
	return levelEncodingsRLE[bitWidth-1].EncodeLevels(dst, src)
}

func (wb *writerBuffers) encodeRepetitionLevels(page Page, maxRepetitionLevel byte) (err error) {
	wb.repetitions, err = encodeLevels(wb.repetitions, page.RepetitionLevels(), maxRepetitionLevel)
	return
}

func (wb *writerBuffers) encodeDefinitionLevels(page Page, maxDefinitionLevel byte) (err error) {
	wb.definitions, err = encodeLevels(wb.definitions, page.DefinitionLevels(), maxDefinitionLevel)
	return
}

func (wb *writerBuffers) prependLevelsToDataPageV1(maxRepetitionLevel, maxDefinitionLevel byte) {
	hasRepetitionLevels := maxRepetitionLevel > 0
	hasDefinitionLevels := maxDefinitionLevel > 0

	if hasRepetitionLevels || hasDefinitionLevels {
		wb.scratch = wb.scratch[:0]
		// In data pages v1, the repetition and definition levels are prefixed
		// with the 4 bytes length of the sections. While the parquet-format
		// documentation indicates that the length prefix is part of the hybrid
		// RLE/Bit-Pack encoding, this is the only condition where it is used
		// so we treat it as a special case rather than implementing it in the
		// encoding.
		//
		// Reference https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3
		if hasRepetitionLevels {
			wb.scratch = plain.AppendInt32(wb.scratch, int32(len(wb.repetitions)))
			wb.scratch = append(wb.scratch, wb.repetitions...)
			wb.repetitions = wb.repetitions[:0]
		}
		if hasDefinitionLevels {
			wb.scratch = plain.AppendInt32(wb.scratch, int32(len(wb.definitions)))
			wb.scratch = append(wb.scratch, wb.definitions...)
			wb.definitions = wb.definitions[:0]
		}
		wb.scratch = append(wb.scratch, wb.page...)
		wb.swapPageAndScratchBuffers()
	}
}

func (wb *writerBuffers) encode(page Page, enc encoding.Encoding) (err error) {
	pageType := page.Type()
	pageData := page.Data()
	wb.page, err = pageType.Encode(wb.page[:0], pageData, enc)
	return err
}

func (wb *writerBuffers) compress(codec compress.Codec) (err error) {
	wb.scratch, err = codec.Encode(wb.scratch[:0], wb.page)
	wb.swapPageAndScratchBuffers()
	return err
}

func (wb *writerBuffers) swapPageAndScratchBuffers() {
	wb.page, wb.scratch = wb.scratch, wb.page[:0]
}

type writerColumn struct {
	pool  BufferPool
	pages []io.ReadWriteSeeker

	columnPath   columnPath
	columnType   Type
	columnIndex  ColumnIndexer
	columnBuffer ColumnBuffer
	columnFilter BloomFilterColumn
	encoding     encoding.Encoding
	compression  compress.Codec
	dictionary   Dictionary

	dataPageType       format.PageType
	maxRepetitionLevel byte
	maxDefinitionLevel byte

	buffers *writerBuffers

	header struct {
		protocol thrift.CompactProtocol
		encoder  thrift.Encoder
	}

	filter         []byte
	numRows        int64
	bufferIndex    int32
	bufferSize     int32
	writePageStats bool
	isCompressed   bool
	encodings      []format.Encoding

	columnChunk *format.ColumnChunk
	offsetIndex *format.OffsetIndex
}

func (c *writerColumn) reset() {
	if c.columnBuffer != nil {
		c.columnBuffer.Reset()
	}
	if c.columnIndex != nil {
		c.columnIndex.Reset()
	}
	if c.dictionary != nil {
		c.dictionary.Reset()
	}
	for _, page := range c.pages {
		c.pool.PutBuffer(page)
	}
	for i := range c.pages {
		c.pages[i] = nil
	}
	c.pages = c.pages[:0]
	// Bloom filters may change in size between row groups, but we retain the
	// buffer to avoid reallocating large memory blocks.
	c.filter = c.filter[:0]
	c.numRows = 0
	// Reset the fields of column chunks that change between row groups,
	// but keep the ones that remain unchanged.
	c.columnChunk.MetaData.NumValues = 0
	c.columnChunk.MetaData.TotalUncompressedSize = 0
	c.columnChunk.MetaData.TotalCompressedSize = 0
	c.columnChunk.MetaData.DataPageOffset = 0
	c.columnChunk.MetaData.DictionaryPageOffset = 0
	c.columnChunk.MetaData.Statistics = format.Statistics{}
	c.columnChunk.MetaData.EncodingStats = c.columnChunk.MetaData.EncodingStats[:0]
	c.columnChunk.MetaData.BloomFilterOffset = 0
	c.offsetIndex.PageLocations = c.offsetIndex.PageLocations[:0]
}

func (c *writerColumn) totalRowCount() int64 {
	n := c.numRows
	if c.columnBuffer != nil {
		n += int64(c.columnBuffer.Len())
	}
	return n
}

func (c *writerColumn) flush() (err error) {
	if c.columnBuffer.Len() > 0 {
		defer c.columnBuffer.Reset()
		_, err = c.writeDataPage(c.columnBuffer.Page())
	}
	return err
}

func (c *writerColumn) flushFilterPages() error {
	if c.columnFilter == nil {
		return nil
	}

	// If there is a dictionary, it contains all the values that we need to
	// write to the filter.
	if dict := c.dictionary; dict != nil {
		// Need to always attempt to resize the filter, as the writer might
		// be reused after resetting which would have reset the length of
		// the filter to 0.
		c.resizeBloomFilter(int64(dict.Len()))
		return c.writePageToFilter(dict.Page())
	}

	// When the filter was already allocated, pages have been written to it as
	// they were seen by the column writer.
	if len(c.filter) > 0 {
		return nil
	}

	// When the filter was not allocated, the writer did not know how many
	// values were going to be seen and therefore could not properly size the
	// filter ahead of time. In this case, we read back all the pages that we
	// have encoded and copy their values back to the filter.
	//
	// A prior implementation of the column writer used to create in-memory
	// copies of the pages to avoid this decoding step; however, this unbounded
	// allocation caused memory exhaustion in production applications. CPU being
	// a somewhat more stretchable resource, we prefer spending time on this
	// decoding step than having to trigger incident response when production
	// systems are getting OOM-Killed.
	c.resizeBloomFilter(c.columnChunk.MetaData.NumValues)

	column := &Column{
		// Set all the fields required by the decodeDataPage* methods.
		typ:                c.columnType,
		encoding:           c.encoding,
		compression:        c.compression,
		maxRepetitionLevel: c.maxRepetitionLevel,
		maxDefinitionLevel: c.maxDefinitionLevel,
		index:              int16(c.bufferIndex),
	}

	rbuf, pool := getBufioReader(nil, 1024)
	pbuf := (*buffer)(nil)
	defer func() {
		putBufioReader(rbuf, pool)
		if pbuf != nil {
			pbuf.unref()
		}
	}()

	decoder := thrift.NewDecoder(c.header.protocol.NewReader(rbuf))

	for _, p := range c.pages {
		rbuf.Reset(p)

		header := new(format.PageHeader)
		if err := decoder.Decode(header); err != nil {
			return err
		}

		if pbuf != nil {
			pbuf.unref()
		}
		pbuf = buffers.get(int(header.CompressedPageSize))
		if _, err := io.ReadFull(rbuf, pbuf.data); err != nil {
			return err
		}
		if _, err := p.Seek(0, io.SeekStart); err != nil {
			return err
		}

		var page Page
		var err error

		switch header.Type {
		case format.DataPage:
			page, err = column.decodeDataPageV1(DataPageHeaderV1{header.DataPageHeader}, pbuf, nil, header.UncompressedPageSize)
		case format.DataPageV2:
			page, err = column.decodeDataPageV2(DataPageHeaderV2{header.DataPageHeaderV2}, pbuf, nil, header.UncompressedPageSize)
		}
		if page != nil {
			err = c.writePageToFilter(page)
			Release(page)
		}
		if err != nil {
			return err
		}
	}

	return nil
}

func (c *writerColumn) resizeBloomFilter(numValues int64) {
	filterSize := c.columnFilter.Size(numValues)
	if cap(c.filter) < filterSize {
		c.filter = make([]byte, filterSize)
	} else {
		c.filter = c.filter[:filterSize]
		for i := range c.filter {
			c.filter[i] = 0
		}
	}
}

func (c *writerColumn) newColumnBuffer() ColumnBuffer {
	column := c.columnType.NewColumnBuffer(int(c.bufferIndex), c.columnType.EstimateNumValues(int(c.bufferSize)))
	switch {
	case c.maxRepetitionLevel > 0:
		column = newRepeatedColumnBuffer(column, c.maxRepetitionLevel, c.maxDefinitionLevel, nullsGoLast)
	case c.maxDefinitionLevel > 0:
		column = newOptionalColumnBuffer(column, c.maxDefinitionLevel, nullsGoLast)
	}
	return column
}

func (c *writerColumn) writeRows(rows []Value) error {
	if c.columnBuffer == nil {
		// Lazily create the row group column so we don't need to allocate it if
		// rows are not written individually to the column.
		c.columnBuffer = c.newColumnBuffer()
	}
	if _, err := c.columnBuffer.WriteValues(rows); err != nil {
		return err
	}
	if c.columnBuffer.Size() >= int64(c.bufferSize) {
		return c.flush()
	}
	return nil
}

func (c *writerColumn) WriteValues(values []Value) (numValues int, err error) {
	if c.columnBuffer == nil {
		c.columnBuffer = c.newColumnBuffer()
	}
	return c.columnBuffer.WriteValues(values)
}

func (c *writerColumn) writeBloomFilter(w io.Writer) error {
	e := thrift.NewEncoder(c.header.protocol.NewWriter(w))
	h := bloomFilterHeader(c.columnFilter)
	h.NumBytes = int32(len(c.filter))
	if err := e.Encode(&h); err != nil {
		return err
	}
	_, err := w.Write(c.filter)
	return err
}

func (c *writerColumn) writeDataPage(page Page) (int64, error) {
	numValues := page.NumValues()
	if numValues == 0 {
		return 0, nil
	}

	buf := c.buffers
	buf.reset()

	if c.maxRepetitionLevel > 0 {
		buf.encodeRepetitionLevels(page, c.maxRepetitionLevel)
	}
	if c.maxDefinitionLevel > 0 {
		buf.encodeDefinitionLevels(page, c.maxDefinitionLevel)
	}

	if err := buf.encode(page, c.encoding); err != nil {
		return 0, fmt.Errorf("encoding parquet data page: %w", err)
	}
	if c.dataPageType == format.DataPage {
		buf.prependLevelsToDataPageV1(c.maxDefinitionLevel, c.maxDefinitionLevel)
	}

	uncompressedPageSize := buf.size()
	if c.isCompressed {
		if err := buf.compress(c.compression); err != nil {
			return 0, fmt.Errorf("compressing parquet data page: %w", err)
		}
	}

	if page.Dictionary() == nil && len(c.filter) > 0 {
		// When the writer knows the number of values in advance (e.g. when
		// writing a full row group), the filter encoding is set and the page
		// can be directly applied to the filter, which minimizes memory usage
		// since there is no need to buffer the values in order to determine
		// the size of the filter.
		if err := c.writePageToFilter(page); err != nil {
			return 0, err
		}
	}

	statistics := format.Statistics{}
	if c.writePageStats {
		statistics = c.makePageStatistics(page)
	}

	pageHeader := &format.PageHeader{
		Type:                 c.dataPageType,
		UncompressedPageSize: int32(uncompressedPageSize),
		CompressedPageSize:   int32(buf.size()),
		CRC:                  int32(buf.crc32()),
	}

	numRows := page.NumRows()
	numNulls := page.NumNulls()
	switch c.dataPageType {
	case format.DataPage:
		pageHeader.DataPageHeader = &format.DataPageHeader{
			NumValues:               int32(numValues),
			Encoding:                c.encoding.Encoding(),
			DefinitionLevelEncoding: format.RLE,
			RepetitionLevelEncoding: format.RLE,
			Statistics:              statistics,
		}
	case format.DataPageV2:
		pageHeader.DataPageHeaderV2 = &format.DataPageHeaderV2{
			NumValues:                  int32(numValues),
			NumNulls:                   int32(numNulls),
			NumRows:                    int32(numRows),
			Encoding:                   c.encoding.Encoding(),
			DefinitionLevelsByteLength: int32(len(buf.definitions)),
			RepetitionLevelsByteLength: int32(len(buf.repetitions)),
			IsCompressed:               &c.isCompressed,
			Statistics:                 statistics,
		}
	}

	buf.header.Reset()
	if err := c.header.encoder.Encode(pageHeader); err != nil {
		return 0, err
	}

	size := int64(buf.header.Len()) +
		int64(len(buf.repetitions)) +
		int64(len(buf.definitions)) +
		int64(len(buf.page))

	err := c.writePageTo(size, func(output io.Writer) (written int64, err error) {
		for _, data := range [...][]byte{
			buf.header.Bytes(),
			buf.repetitions,
			buf.definitions,
			buf.page,
		} {
			wn, err := output.Write(data)
			written += int64(wn)
			if err != nil {
				return written, err
			}
		}
		return written, nil
	})
	if err != nil {
		return 0, err
	}

	c.recordPageStats(int32(buf.header.Len()), pageHeader, page)
	return numValues, nil
}

func (c *writerColumn) writeDictionaryPage(output io.Writer, dict Dictionary) (err error) {
	buf := c.buffers
	buf.reset()

	if err := buf.encode(dict.Page(), &Plain); err != nil {
		return fmt.Errorf("writing parquet dictionary page: %w", err)
	}

	uncompressedPageSize := buf.size()
	if isCompressed(c.compression) {
		if err := buf.compress(c.compression); err != nil {
			return fmt.Errorf("copmressing parquet dictionary page: %w", err)
		}
	}

	pageHeader := &format.PageHeader{
		Type:                 format.DictionaryPage,
		UncompressedPageSize: int32(uncompressedPageSize),
		CompressedPageSize:   int32(buf.size()),
		CRC:                  int32(buf.crc32()),
		DictionaryPageHeader: &format.DictionaryPageHeader{
			NumValues: int32(dict.Len()),
			Encoding:  format.Plain,
			IsSorted:  false,
		},
	}

	header := &c.buffers.header
	header.Reset()
	if err := c.header.encoder.Encode(pageHeader); err != nil {
		return err
	}
	if _, err := output.Write(header.Bytes()); err != nil {
		return err
	}
	if _, err := output.Write(buf.page); err != nil {
		return err
	}
	c.recordPageStats(int32(header.Len()), pageHeader, nil)
	return nil
}

func (w *writerColumn) writePageToFilter(page Page) (err error) {
	pageType := page.Type()
	pageData := page.Data()
	w.filter, err = pageType.Encode(w.filter, pageData, w.columnFilter.Encoding())
	return err
}

func (c *writerColumn) writePageTo(size int64, writeTo func(io.Writer) (int64, error)) error {
	buffer := c.pool.GetBuffer()
	defer func() {
		if buffer != nil {
			c.pool.PutBuffer(buffer)
		}
	}()
	written, err := writeTo(buffer)
	if err != nil {
		return err
	}
	if written != size {
		return fmt.Errorf("writing parquet column page expected %dB but got %dB: %w", size, written, io.ErrShortWrite)
	}
	offset, err := buffer.Seek(0, io.SeekStart)
	if err != nil {
		return err
	}
	if offset != 0 {
		return fmt.Errorf("resetting parquet page buffer to the start expected offset zero but got %d", offset)
	}
	c.pages, buffer = append(c.pages, buffer), nil
	return nil
}

func (c *writerColumn) makePageStatistics(page Page) format.Statistics {
	numNulls := page.NumNulls()
	minValue, maxValue, _ := page.Bounds()
	minValueBytes := minValue.Bytes()
	maxValueBytes := maxValue.Bytes()
	return format.Statistics{
		Min:       minValueBytes, // deprecated
		Max:       maxValueBytes, // deprecated
		NullCount: numNulls,
		MinValue:  minValueBytes,
		MaxValue:  maxValueBytes,
	}
}

func (c *writerColumn) recordPageStats(headerSize int32, header *format.PageHeader, page Page) {
	uncompressedSize := headerSize + header.UncompressedPageSize
	compressedSize := headerSize + header.CompressedPageSize

	if page != nil {
		numNulls := page.NumNulls()
		numValues := page.NumValues()
		minValue, maxValue, pageHasBounds := page.Bounds()
		c.columnIndex.IndexPage(numValues, numNulls, minValue, maxValue)
		c.columnChunk.MetaData.NumValues += numValues
		c.columnChunk.MetaData.Statistics.NullCount += numNulls

		if pageHasBounds {
			var existingMaxValue, existingMinValue Value

			if c.columnChunk.MetaData.Statistics.MaxValue != nil && c.columnChunk.MetaData.Statistics.MinValue != nil {
				existingMaxValue = c.columnType.Kind().Value(c.columnChunk.MetaData.Statistics.MaxValue)
				existingMinValue = c.columnType.Kind().Value(c.columnChunk.MetaData.Statistics.MinValue)
			}

			if existingMaxValue.isNull() || c.columnType.Compare(maxValue, existingMaxValue) > 0 {
				c.columnChunk.MetaData.Statistics.MaxValue = maxValue.Bytes()
			}

			if existingMinValue.isNull() || c.columnType.Compare(minValue, existingMinValue) < 0 {
				c.columnChunk.MetaData.Statistics.MinValue = minValue.Bytes()
			}
		}

		c.offsetIndex.PageLocations = append(c.offsetIndex.PageLocations, format.PageLocation{
			Offset:             c.columnChunk.MetaData.TotalCompressedSize,
			CompressedPageSize: compressedSize,
			FirstRowIndex:      c.numRows,
		})

		c.numRows += page.NumRows()
	}

	pageType := header.Type
	encoding := format.Encoding(-1)
	switch pageType {
	case format.DataPageV2:
		encoding = header.DataPageHeaderV2.Encoding
	case format.DataPage:
		encoding = header.DataPageHeader.Encoding
	case format.DictionaryPage:
		encoding = header.DictionaryPageHeader.Encoding
	}

	c.columnChunk.MetaData.TotalUncompressedSize += int64(uncompressedSize)
	c.columnChunk.MetaData.TotalCompressedSize += int64(compressedSize)
	c.columnChunk.MetaData.EncodingStats = addPageEncodingStats(c.columnChunk.MetaData.EncodingStats, format.PageEncodingStats{
		PageType: pageType,
		Encoding: encoding,
		Count:    1,
	})
}

func addEncoding(encodings []format.Encoding, add format.Encoding) []format.Encoding {
	for _, enc := range encodings {
		if enc == add {
			return encodings
		}
	}
	return append(encodings, add)
}

func addPageEncodingStats(stats []format.PageEncodingStats, pages ...format.PageEncodingStats) []format.PageEncodingStats {
addPages:
	for _, add := range pages {
		for i, st := range stats {
			if st.PageType == add.PageType && st.Encoding == add.Encoding {
				stats[i].Count += add.Count
				continue addPages
			}
		}
		stats = append(stats, add)
	}
	return stats
}

func sortPageEncodings(encodings []format.Encoding) {
	sort.Slice(encodings, func(i, j int) bool {
		return encodings[i] < encodings[j]
	})
}

func sortPageEncodingStats(stats []format.PageEncodingStats) {
	sort.Slice(stats, func(i, j int) bool {
		s1 := &stats[i]
		s2 := &stats[j]
		if s1.PageType != s2.PageType {
			return s1.PageType < s2.PageType
		}
		return s1.Encoding < s2.Encoding
	})
}

type offsetTrackingWriter struct {
	writer io.Writer
	offset int64
}

func (w *offsetTrackingWriter) Reset(writer io.Writer) {
	w.writer = writer
	w.offset = 0
}

func (w *offsetTrackingWriter) Write(b []byte) (int, error) {
	n, err := w.writer.Write(b)
	w.offset += int64(n)
	return n, err
}

func (w *offsetTrackingWriter) WriteString(s string) (int, error) {
	n, err := io.WriteString(w.writer, s)
	w.offset += int64(n)
	return n, err
}

func (w *offsetTrackingWriter) ReadFrom(r io.Reader) (int64, error) {
	// io.Copy will make use of io.ReaderFrom if w.writer implements it.
	n, err := io.Copy(w.writer, r)
	w.offset += n
	return n, err
}

var (
	_ RowWriterWithSchema = (*Writer)(nil)
	_ RowReaderFrom       = (*Writer)(nil)
	_ RowGroupWriter      = (*Writer)(nil)

	_ RowWriter   = (*writer)(nil)
	_ ValueWriter = (*writer)(nil)

	_ ValueWriter = (*writerColumn)(nil)

	_ io.ReaderFrom   = (*offsetTrackingWriter)(nil)
	_ io.StringWriter = (*offsetTrackingWriter)(nil)
)


================================================
FILE: writer_go18.go
================================================
//go:build go1.18

package parquet

import (
	"io"
	"reflect"
)

// GenericWriter is similar to a Writer but uses a type parameter to define the
// Go type representing the schema of rows being written.
//
// Using this type over Writer has multiple advantages:
//
//   - By leveraging type information, the Go compiler can provide greater
//     guarantees that the code is correct. For example, the parquet.Writer.Write
//     method accepts an argument of type interface{}, which delays type checking
//     until runtime. The parquet.GenericWriter[T].Write method ensures at
//     compile time that the values it receives will be of type T, reducing the
//     risk of introducing errors.
//
//   - Since type information is known at compile time, the implementation of
//     parquet.GenericWriter[T] can make safe assumptions, removing the need for
//     runtime validation of how the parameters are passed to its methods.
//     Optimizations relying on type information are more effective, some of the
//     writer's state can be precomputed at initialization, which was not possible
//     with parquet.Writer.
//
//   - The parquet.GenericWriter[T].Write method uses a data-oriented design,
//     accepting an slice of T instead of a single value, creating more
//     opportunities to amortize the runtime cost of abstractions.
//     This optimization is not available for parquet.Writer because its Write
//     method's argument would be of type []interface{}, which would require
//     conversions back and forth from concrete types to empty interfaces (since
//     a []T cannot be interpreted as []interface{} in Go), would make the API
//     more difficult to use and waste compute resources in the type conversions,
//     defeating the purpose of the optimization in the first place.
//
// Note that this type is only available when compiling with Go 1.18 or later.
type GenericWriter[T any] struct {
	// At this time GenericWriter is expressed in terms of Writer to reuse the
	// underlying logic. In the future, and if we accepted to break backward
	// compatibility on the Write method, we could modify Writer to be an alias
	// to GenericWriter with:
	//
	//	type Writer = GenericWriter[any]
	//
	base Writer
	// This function writes rows of type T to the writer, it gets generated by
	// the NewGenericWriter function based on the type T and the underlying
	// schema of the parquet file.
	write writeFunc[T]
	// This field is used to leverage the optimized writeRowsFunc algorithms.
	columns []ColumnBuffer
}

// NewGenericWriter is like NewWriter but returns a GenericWriter[T] suited to
// write rows of Go type T.
//
// The type parameter T should be a map, struct, or any. Any other types will
// cause a panic at runtime. Type checking is a lot more effective when the
// generic parameter is a struct type, using map and interface types is somewhat
// similar to using a Writer.
//
// If the option list may explicitly declare a schema, it must be compatible
// with the schema generated from T.
//
// Sorting columns may be set on the writer to configure the generated row
// groups metadata. However, rows are always written in the order they were
// seen, no reordering is performed, the writer expects the application to
// ensure proper correlation between the order of rows and the list of sorting
// columns. See SortingWriter[T] for a writer which handles reordering rows
// based on the configured sorting columns.
func NewGenericWriter[T any](output io.Writer, options ...WriterOption) *GenericWriter[T] {
	config, err := NewWriterConfig(options...)
	if err != nil {
		panic(err)
	}

	schema := config.Schema
	t := typeOf[T]()

	if schema == nil && t != nil {
		schema = schemaOf(dereference(t))
		config.Schema = schema
	}

	if config.Schema == nil {
		panic("generic writer must be instantiated with schema or concrete type.")
	}

	return &GenericWriter[T]{
		base: Writer{
			output: output,
			config: config,
			schema: schema,
			writer: newWriter(output, config),
		},
		write: writeFuncOf[T](t, config.Schema),
	}
}

type writeFunc[T any] func(*GenericWriter[T], []T) (int, error)

func writeFuncOf[T any](t reflect.Type, schema *Schema) writeFunc[T] {
	if t == nil {
		return (*GenericWriter[T]).writeAny
	}
	switch t.Kind() {
	case reflect.Interface, reflect.Map:
		return (*GenericWriter[T]).writeRows

	case reflect.Struct:
		return makeWriteFunc[T](t, schema)

	case reflect.Pointer:
		if e := t.Elem(); e.Kind() == reflect.Struct {
			return makeWriteFunc[T](t, schema)
		}
	}
	panic("cannot create writer for values of type " + t.String())
}

func makeWriteFunc[T any](t reflect.Type, schema *Schema) writeFunc[T] {
	writeRows := writeRowsFuncOf(t, schema, nil)
	return func(w *GenericWriter[T], rows []T) (n int, err error) {
		if w.columns == nil {
			w.columns = make([]ColumnBuffer, len(w.base.writer.columns))
			for i, c := range w.base.writer.columns {
				// These fields are usually lazily initialized when writing rows,
				// we need them to exist now tho.
				c.columnBuffer = c.newColumnBuffer()
				w.columns[i] = c.columnBuffer
			}
		}
		err = writeRows(w.columns, makeArrayOf(rows), columnLevels{})
		if err == nil {
			n = len(rows)
		}
		return n, err
	}
}

func (w *GenericWriter[T]) Close() error {
	return w.base.Close()
}

func (w *GenericWriter[T]) Flush() error {
	return w.base.Flush()
}

func (w *GenericWriter[T]) Reset(output io.Writer) {
	w.base.Reset(output)
}

func (w *GenericWriter[T]) Write(rows []T) (int, error) {
	return w.base.writer.writeRows(len(rows), func(i, j int) (int, error) {
		n, err := w.write(w, rows[i:j:j])
		if err != nil {
			return n, err
		}

		for _, c := range w.base.writer.columns {
			if c.columnBuffer.Size() >= int64(c.bufferSize) {
				if err := c.flush(); err != nil {
					return n, err
				}
			}
		}

		return n, nil
	})
}

func (w *GenericWriter[T]) WriteRows(rows []Row) (int, error) {
	return w.base.WriteRows(rows)
}

func (w *GenericWriter[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) {
	return w.base.WriteRowGroup(rowGroup)
}

// SetKeyValueMetadata sets a key/value pair in the Parquet file metadata.
//
// Keys are assumed to be unique, if the same key is repeated multiple times the
// last value is retained. While the parquet format does not require unique keys,
// this design decision was made to optimize for the most common use case where
// applications leverage this extension mechanism to associate single values to
// keys. This may create incompatibilities with other parquet libraries, or may
// cause some key/value pairs to be lost when open parquet files written with
// repeated keys. We can revisit this decision if it ever becomes a blocker.
func (w *GenericWriter[T]) SetKeyValueMetadata(key, value string) {
	w.base.SetKeyValueMetadata(key, value)
}

func (w *GenericWriter[T]) ReadRowsFrom(rows RowReader) (int64, error) {
	return w.base.ReadRowsFrom(rows)
}

func (w *GenericWriter[T]) Schema() *Schema {
	return w.base.Schema()
}

func (w *GenericWriter[T]) writeRows(rows []T) (int, error) {
	if cap(w.base.rowbuf) < len(rows) {
		w.base.rowbuf = make([]Row, len(rows))
	} else {
		w.base.rowbuf = w.base.rowbuf[:len(rows)]
	}
	defer clearRows(w.base.rowbuf)

	schema := w.base.Schema()
	for i := range rows {
		w.base.rowbuf[i] = schema.Deconstruct(w.base.rowbuf[i], &rows[i])
	}

	return w.base.WriteRows(w.base.rowbuf)
}

func (w *GenericWriter[T]) writeAny(rows []T) (n int, err error) {
	for i := range rows {
		if err = w.base.Write(rows[i]); err != nil {
			return n, err
		}
		n++
	}
	return n, nil
}

var (
	_ RowWriterWithSchema = (*GenericWriter[any])(nil)
	_ RowReaderFrom       = (*GenericWriter[any])(nil)
	_ RowGroupWriter      = (*GenericWriter[any])(nil)

	_ RowWriterWithSchema = (*GenericWriter[struct{}])(nil)
	_ RowReaderFrom       = (*GenericWriter[struct{}])(nil)
	_ RowGroupWriter      = (*GenericWriter[struct{}])(nil)

	_ RowWriterWithSchema = (*GenericWriter[map[struct{}]struct{}])(nil)
	_ RowReaderFrom       = (*GenericWriter[map[struct{}]struct{}])(nil)
	_ RowGroupWriter      = (*GenericWriter[map[struct{}]struct{}])(nil)
)


================================================
FILE: writer_go18_test.go
================================================
//go:build go1.18

package parquet_test

import (
	"bytes"
	"io"
	"math/rand"
	"reflect"
	"testing"

	"github.com/segmentio/parquet-go"
)

func BenchmarkGenericWriter(b *testing.B) {
	benchmarkGenericWriter[benchmarkRowType](b)
	benchmarkGenericWriter[booleanColumn](b)
	benchmarkGenericWriter[int32Column](b)
	benchmarkGenericWriter[int64Column](b)
	benchmarkGenericWriter[floatColumn](b)
	benchmarkGenericWriter[doubleColumn](b)
	benchmarkGenericWriter[byteArrayColumn](b)
	benchmarkGenericWriter[fixedLenByteArrayColumn](b)
	benchmarkGenericWriter[stringColumn](b)
	benchmarkGenericWriter[indexedStringColumn](b)
	benchmarkGenericWriter[uuidColumn](b)
	benchmarkGenericWriter[timeColumn](b)
	benchmarkGenericWriter[timeInMillisColumn](b)
	benchmarkGenericWriter[mapColumn](b)
	benchmarkGenericWriter[decimalColumn](b)
	benchmarkGenericWriter[contact](b)
	benchmarkGenericWriter[paddedBooleanColumn](b)
	benchmarkGenericWriter[optionalInt32Column](b)
	benchmarkGenericWriter[repeatedInt32Column](b)
}

func benchmarkGenericWriter[Row generator[Row]](b *testing.B) {
	var model Row
	b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) {
		prng := rand.New(rand.NewSource(0))
		rows := make([]Row, benchmarkNumRows)
		for i := range rows {
			rows[i] = rows[i].generate(prng)
		}

		b.Run("go1.17", func(b *testing.B) {
			writer := parquet.NewWriter(io.Discard, parquet.SchemaOf(rows[0]))
			i := 0
			benchmarkRowsPerSecond(b, func() int {
				for j := 0; j < benchmarkRowsPerStep; j++ {
					if err := writer.Write(&rows[i]); err != nil {
						b.Fatal(err)
					}
				}

				i += benchmarkRowsPerStep
				i %= benchmarkNumRows

				if i == 0 {
					writer.Close()
					writer.Reset(io.Discard)
				}
				return benchmarkRowsPerStep
			})
		})

		b.Run("go1.18", func(b *testing.B) {
			writer := parquet.NewGenericWriter[Row](io.Discard)
			i := 0
			benchmarkRowsPerSecond(b, func() int {
				n, err := writer.Write(rows[i : i+benchmarkRowsPerStep])
				if err != nil {
					b.Fatal(err)
				}

				i += benchmarkRowsPerStep
				i %= benchmarkNumRows

				if i == 0 {
					writer.Close()
					writer.Reset(io.Discard)
				}
				return n
			})
		})
	})
}

func TestIssue272(t *testing.T) {
	type T2 struct {
		X string `parquet:",dict,optional"`
	}

	type T1 struct {
		TA *T2
		TB *T2
	}

	type T struct {
		T1 *T1
	}

	const nRows = 1

	row := T{
		T1: &T1{
			TA: &T2{
				X: "abc",
			},
		},
	}

	rows := make([]T, nRows)
	for i := range rows {
		rows[i] = row
	}

	b := new(bytes.Buffer)
	w := parquet.NewGenericWriter[T](b)

	if _, err := w.Write(rows); err != nil {
		t.Fatal(err)
	}
	if err := w.Close(); err != nil {
		t.Fatal(err)
	}

	f := bytes.NewReader(b.Bytes())
	r := parquet.NewGenericReader[T](f)

	parquetRows := make([]parquet.Row, nRows)
	n, err := r.ReadRows(parquetRows)
	if err != nil && err != io.EOF {
		t.Fatal(err)
	}
	if n != nRows {
		t.Fatalf("wrong number of rows read: want=%d got=%d", nRows, n)
	}
	for _, r := range parquetRows {
		if d := r[0].DefinitionLevel(); d != 3 {
			t.Errorf("wrong definition level for column 0: %d", d)
		}
		if d := r[1].DefinitionLevel(); d != 1 {
			t.Errorf("wrong definition level for column 1: %d", d)
		}
	}
}

func TestIssue279(t *testing.T) {
	type T2 struct {
		Id   int    `parquet:",plain,optional"`
		Name string `parquet:",plain,optional"`
	}

	type T1 struct {
		TA []*T2
	}

	type T struct {
		T1 *T1
	}

	const nRows = 1

	row := T{
		T1: &T1{
			TA: []*T2{
				{
					Id:   43,
					Name: "john",
				},
			},
		},
	}

	rows := make([]T, nRows)
	for i := range rows {
		rows[i] = row
	}

	b := new(bytes.Buffer)
	w := parquet.NewGenericWriter[T](b)

	if _, err := w.Write(rows); err != nil {
		t.Fatal(err)
	}
	if err := w.Close(); err != nil {
		t.Fatal(err)
	}

	f := bytes.NewReader(b.Bytes())
	r := parquet.NewGenericReader[T](f)

	parquetRows := make([]parquet.Row, nRows)
	n, err := r.ReadRows(parquetRows)
	if err != nil && err != io.EOF {
		t.Fatal(err)
	}
	if n != nRows {
		t.Fatalf("wrong number of rows read: want=%d got=%d", nRows, n)
	}
	for _, r := range parquetRows {
		if d := r[0].DefinitionLevel(); d != 3 {
			t.Errorf("wrong definition level for column 0: %d", d)
		}
		if d := r[1].DefinitionLevel(); d != 3 {
			t.Errorf("wrong definition level for column 1: %d", d)
		}
	}
}

func TestIssue302(t *testing.T) {
	tests := []struct {
		name string
		fn   func(t *testing.T)
	}{
		{
			name: "SimpleMap",
			fn: func(t *testing.T) {
				type M map[string]int

				type T struct {
					M M `parquet:","`
				}

				b := new(bytes.Buffer)
				_ = parquet.NewGenericWriter[T](b)

			},
		},

		{
			name: "MapWithValueTag",
			fn: func(t *testing.T) {
				type M map[string]int

				type T struct {
					M M `parquet:"," parquet-value:",zstd"`
				}

				b := new(bytes.Buffer)
				_ = parquet.NewGenericWriter[T](b)

			},
		},

		{
			name: "MapWithOptionalTag",
			fn: func(t *testing.T) {
				type M map[string]int

				type T struct {
					M M `parquet:",optional"`
				}

				b := new(bytes.Buffer)
				w := parquet.NewGenericWriter[T](b)
				expect := []T{
					{
						M: M{
							"Holden": 1,
							"Naomi":  2,
						},
					},
					{
						M: nil,
					},
					{
						M: M{
							"Naomi":  1,
							"Holden": 2,
						},
					},
				}
				_, err := w.Write(expect)
				if err != nil {
					t.Fatal(err)
				}
				if err = w.Close(); err != nil {
					t.Fatal(err)
				}

				bufReader := bytes.NewReader(b.Bytes())
				r := parquet.NewGenericReader[T](bufReader)
				values := make([]T, 3)
				_, err = r.Read(values)
				if !reflect.DeepEqual(expect, values) {
					t.Fatalf("values do not match.\n\texpect: %v\n\tactual: %v", expect, values)
				}
			},
		},
	}

	for _, test := range tests {
		t.Run(test.name, test.fn)
	}
}

func TestIssue347Writer(t *testing.T) {
	type TestType struct {
		Key int
	}

	b := new(bytes.Buffer)
	// instantiating with concrete type shouldn't panic
	_ = parquet.NewGenericWriter[TestType](b)

	// instantiating with schema and interface type parameter shouldn't panic
	schema := parquet.SchemaOf(TestType{})
	_ = parquet.NewGenericWriter[any](b, schema)

	defer func() {
		if r := recover(); r == nil {
			t.Errorf("instantiating generic buffer without schema and with interface " +
				"type parameter should panic")
		}
	}()
	_ = parquet.NewGenericWriter[any](b)
}

func TestIssue375(t *testing.T) {
	type Row struct{ FirstName, LastName string }

	output := new(bytes.Buffer)
	writer := parquet.NewGenericWriter[Row](output, parquet.MaxRowsPerRowGroup(10))

	rows := make([]Row, 100)
	for i := range rows {
		rows[i] = Row{
			FirstName: "0123456789"[i%10 : i%10+1],
			LastName:  "foo",
		}
	}

	n, err := writer.Write(rows)
	if err != nil {
		t.Fatal(err)
	}
	if n != len(rows) {
		t.Fatal("wrong number of rows written:", n)
	}

	if err := writer.Close(); err != nil {
		t.Fatal(err)
	}

	f, err := parquet.OpenFile(bytes.NewReader(output.Bytes()), int64(output.Len()))
	if err != nil {
		t.Fatal(err)
	}

	rowGroups := f.RowGroups()
	if len(rowGroups) != 10 {
		t.Errorf("wrong number of row groups in parquet file: want=10 got=%d", len(rowGroups))
	}
}

func TestGenericSetKeyValueMetadata(t *testing.T) {
	testKey := "test-key"
	testValue := "test-value"

	type Row struct{ FirstName, LastName string }

	output := new(bytes.Buffer)
	writer := parquet.NewGenericWriter[Row](output, parquet.MaxRowsPerRowGroup(10))

	rows := []Row{
		{FirstName: "First", LastName: "Last"},
	}

	_, err := writer.Write(rows)
	if err != nil {
		t.Fatal(err)
	}

	writer.SetKeyValueMetadata(testKey, testValue)

	err = writer.Close()
	if err != nil {
		t.Fatal(err)
	}

	f, err := parquet.OpenFile(bytes.NewReader(output.Bytes()), int64(output.Len()))
	if err != nil {
		t.Fatal(err)
	}

	value, ok := f.Lookup(testKey)
	if !ok {
		t.Fatalf("key/value metadata should have included %q", testKey)
	}
	if value != testValue {
		t.Errorf("expected %q, got %q", testValue, value)
	}
}


================================================
FILE: writer_test.go
================================================
package parquet_test

import (
	"bytes"
	"fmt"
	"os"
	"os/exec"
	"strings"
	"testing"

	"github.com/google/uuid"
	"github.com/hexops/gotextdiff"
	"github.com/hexops/gotextdiff/myers"
	"github.com/hexops/gotextdiff/span"

	"github.com/segmentio/parquet-go"
	"github.com/segmentio/parquet-go/compress"
)

const (
	v1 = 1
	v2 = 2
)

func scanParquetFile(f *os.File) error {
	s, err := f.Stat()
	if err != nil {
		return err
	}

	p, err := parquet.OpenFile(f, s.Size())
	if err != nil {
		return err
	}

	return scanParquetValues(p.Root())
}

func scanParquetValues(col *parquet.Column) error {
	return forEachColumnValue(col, func(leaf *parquet.Column, value parquet.Value) error {
		fmt.Printf("%s > %+v\n", strings.Join(leaf.Path(), "."), value)
		return nil
	})
}

func generateParquetFile(rows rows, options ...parquet.WriterOption) ([]byte, error) {
	tmp, err := os.CreateTemp("/tmp", "*.parquet")
	if err != nil {
		return nil, err
	}
	defer tmp.Close()
	path := tmp.Name()
	defer os.Remove(path)
	// fmt.Println(path)

	writerOptions := []parquet.WriterOption{parquet.PageBufferSize(20)}
	writerOptions = append(writerOptions, options...)

	if err := writeParquetFile(tmp, rows, writerOptions...); err != nil {
		return nil, err
	}

	if err := scanParquetFile(tmp); err != nil {
		return nil, err
	}

	return parquetTools("dump", path)
}

type firstAndLastName struct {
	FirstName string `parquet:"first_name,dict,zstd"`
	LastName  string `parquet:"last_name,delta,zstd"`
}

type timeseries struct {
	Name      string  `parquet:"name,dict"`
	Timestamp int64   `parquet:"timestamp,delta"`
	Value     float64 `parquet:"value"`
}

type event struct {
	Name     string  `parquet:"name,dict"`
	Type     string  `parquet:"-"`
	Value    float64 `parquet:"value"`
	Category string  `parquet:"-"`
}

var writerTests = []struct {
	scenario string
	version  int
	codec    compress.Codec
	rows     []interface{}
	dump     string
}{
	{
		scenario: "page v1 with dictionary encoding",
		version:  v1,
		rows: []interface{}{
			&firstAndLastName{FirstName: "Han", LastName: "Solo"},
			&firstAndLastName{FirstName: "Leia", LastName: "Skywalker"},
			&firstAndLastName{FirstName: "Luke", LastName: "Skywalker"},
		},
		dump: `row group 0
--------------------------------------------------------------------------------
first_name:  BINARY ZSTD DO:4 FPO:55 SZ:90/72/0.80 VC:3 ENC:RLE_DICTIONARY,PLAIN ST:[min: Han, max: Luke, num_nulls not defined]
last_name:   BINARY ZSTD DO:0 FPO:94 SZ:127/121/0.95 VC:3 ENC:DELTA_BYTE_ARRAY ST:[min: Skywalker, max: Solo, num_nulls not defined]

    first_name TV=3 RL=0 DL=0 DS: 3 DE:PLAIN
    ----------------------------------------------------------------------------
    page 0:                        DLE:RLE RLE:RLE VLE:RLE_DICTIONARY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:7 VC:3

    last_name TV=3 RL=0 DL=0
    ----------------------------------------------------------------------------
    page 0:                        DLE:RLE RLE:RLE VLE:DELTA_BYTE_ARRAY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:56 VC:2
    page 1:                        DLE:RLE RLE:RLE VLE:DELTA_BYTE_ARRAY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:19 VC:1

BINARY first_name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:0 V:Han
value 2: R:0 D:0 V:Leia
value 3: R:0 D:0 V:Luke

BINARY last_name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:0 V:Solo
value 2: R:0 D:0 V:Skywalker
value 3: R:0 D:0 V:Skywalker
`,
	},

	{ // same as the previous test but uses page v2 where data pages aren't compressed
		scenario: "page v2 with dictionary encoding",
		version:  v2,
		rows: []interface{}{
			&firstAndLastName{FirstName: "Han", LastName: "Solo"},
			&firstAndLastName{FirstName: "Leia", LastName: "Skywalker"},
			&firstAndLastName{FirstName: "Luke", LastName: "Skywalker"},
		},
		dump: `row group 0
--------------------------------------------------------------------------------
first_name:  BINARY ZSTD DO:4 FPO:55 SZ:86/77/0.90 VC:3 ENC:PLAIN,RLE_DICTIONARY ST:[min: Han, max: Luke, num_nulls not defined]
last_name:   BINARY ZSTD DO:0 FPO:90 SZ:137/131/0.96 VC:3 ENC:DELTA_BYTE_ARRAY ST:[min: Skywalker, max: Solo, num_nulls not defined]

    first_name TV=3 RL=0 DL=0 DS: 3 DE:PLAIN
    ----------------------------------------------------------------------------
    page 0:                        DLE:RLE RLE:RLE VLE:RLE_DICTIONARY ST:[no stats for this column] SZ:7 VC:3

    last_name TV=3 RL=0 DL=0
    ----------------------------------------------------------------------------
    page 0:                        DLE:RLE RLE:RLE VLE:DELTA_BYTE_ARRAY ST:[no stats for this column] SZ:56 VC:2
    page 1:                        DLE:RLE RLE:RLE VLE:DELTA_BYTE_ARRAY ST:[no stats for this column] SZ:19 VC:1

BINARY first_name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:0 V:Han
value 2: R:0 D:0 V:Leia
value 3: R:0 D:0 V:Luke

BINARY last_name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:0 V:Solo
value 2: R:0 D:0 V:Skywalker
value 3: R:0 D:0 V:Skywalker
`,
	},

	{
		scenario: "timeseries with delta encoding",
		version:  v2,
		codec:    &parquet.Gzip,
		rows: []interface{}{
			timeseries{Name: "http_request_total", Timestamp: 1639444033, Value: 100},
			timeseries{Name: "http_request_total", Timestamp: 1639444058, Value: 0},
			timeseries{Name: "http_request_total", Timestamp: 1639444085, Value: 42},
			timeseries{Name: "http_request_total", Timestamp: 1639444093, Value: 1},
			timeseries{Name: "http_request_total", Timestamp: 1639444101, Value: 2},
			timeseries{Name: "http_request_total", Timestamp: 1639444108, Value: 5},
			timeseries{Name: "http_request_total", Timestamp: 1639444133, Value: 4},
			timeseries{Name: "http_request_total", Timestamp: 1639444137, Value: 5},
			timeseries{Name: "http_request_total", Timestamp: 1639444141, Value: 6},
			timeseries{Name: "http_request_total", Timestamp: 1639444144, Value: 10},
		},
		dump: `row group 0
--------------------------------------------------------------------------------
name:       BINARY GZIP DO:4 FPO:70 SZ:126/101/0.80 VC:10 ENC:PLAIN,RLE_DICTIONARY ST:[min: http_request_total, max: http_request_total, num_nulls not defined]
timestamp:  INT64 GZIP DO:0 FPO:130 SZ:299/550/1.84 VC:10 ENC:DELTA_BINARY_PACKED ST:[min: 1639444033, max: 1639444144, num_nulls not defined]
value:      DOUBLE GZIP DO:0 FPO:429 SZ:292/192/0.66 VC:10 ENC:PLAIN ST:[min: -0.0, max: 100.0, num_nulls not defined]

    name TV=10 RL=0 DL=0 DS: 1 DE:PLAIN
    ----------------------------------------------------------------------------
    page 0:                   DLE:RLE RLE:RLE VLE:RLE_DICTIONARY ST:[no stats for this column] SZ:2 VC:5
    page 1:                   DLE:RLE RLE:RLE VLE:RLE_DICTIONARY ST:[no stats for this column] SZ:2 VC:5

    timestamp TV=10 RL=0 DL=0
    ----------------------------------------------------------------------------
    page 0:                   DLE:RLE RLE:RLE VLE:DELTA_BINARY_PACKED ST:[no stats for this column] SZ:142 VC:3
    page 1:                   DLE:RLE RLE:RLE VLE:DELTA_BINARY_PACKED ST:[no stats for this column] SZ:142 VC:3
    page 2:                   DLE:RLE RLE:RLE VLE:DELTA_BINARY_PACKED ST:[no stats for this column] SZ:142 VC:3
    page 3:                   DLE:RLE RLE:RLE VLE:DELTA_BINARY_PACKED ST:[no stats for this column] SZ:9 VC:1

    value TV=10 RL=0 DL=0
    ----------------------------------------------------------------------------
    page 0:                   DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:24 VC:3
    page 1:                   DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:24 VC:3
    page 2:                   DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:24 VC:3
    page 3:                   DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] SZ:8 VC:1

BINARY name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 10 ***
value 1:  R:0 D:0 V:http_request_total
value 2:  R:0 D:0 V:http_request_total
value 3:  R:0 D:0 V:http_request_total
value 4:  R:0 D:0 V:http_request_total
value 5:  R:0 D:0 V:http_request_total
value 6:  R:0 D:0 V:http_request_total
value 7:  R:0 D:0 V:http_request_total
value 8:  R:0 D:0 V:http_request_total
value 9:  R:0 D:0 V:http_request_total
value 10: R:0 D:0 V:http_request_total

INT64 timestamp
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 10 ***
value 1:  R:0 D:0 V:1639444033
value 2:  R:0 D:0 V:1639444058
value 3:  R:0 D:0 V:1639444085
value 4:  R:0 D:0 V:1639444093
value 5:  R:0 D:0 V:1639444101
value 6:  R:0 D:0 V:1639444108
value 7:  R:0 D:0 V:1639444133
value 8:  R:0 D:0 V:1639444137
value 9:  R:0 D:0 V:1639444141
value 10: R:0 D:0 V:1639444144

DOUBLE value
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 10 ***
value 1:  R:0 D:0 V:100.0
value 2:  R:0 D:0 V:0.0
value 3:  R:0 D:0 V:42.0
value 4:  R:0 D:0 V:1.0
value 5:  R:0 D:0 V:2.0
value 6:  R:0 D:0 V:5.0
value 7:  R:0 D:0 V:4.0
value 8:  R:0 D:0 V:5.0
value 9:  R:0 D:0 V:6.0
value 10: R:0 D:0 V:10.0
`,
	},

	{
		scenario: "example from the twitter blog (v1)",
		version:  v1,
		rows: []interface{}{
			AddressBook{
				Owner: "Julien Le Dem",
				OwnerPhoneNumbers: []string{
					"555 123 4567",
					"555 666 1337",
				},
				Contacts: []Contact{
					{
						Name:        "Dmitriy Ryaboy",
						PhoneNumber: "555 987 6543",
					},
					{
						Name: "Chris Aniszczyk",
					},
				},
			},
			AddressBook{
				Owner:             "A. Nonymous",
				OwnerPhoneNumbers: nil,
			},
		},

		dump: `row group 0
--------------------------------------------------------------------------------
owner:              BINARY ZSTD DO:0 FPO:4 SZ:81/73/0.90 VC:2 ENC:DELTA_LENGTH_BYTE_ARRAY ST:[min: A. Nonymous, max: Julien Le Dem, num_nulls not defined]
ownerPhoneNumbers:  BINARY GZIP DO:0 FPO:85 SZ:179/129/0.72 VC:3 ENC:RLE,DELTA_LENGTH_BYTE_ARRAY ST:[min: 555 123 4567, max: 555 666 1337, num_nulls: 1]
contacts:
.name:              BINARY UNCOMPRESSED DO:0 FPO:264 SZ:138/138/1.00 VC:3 ENC:RLE,DELTA_LENGTH_BYTE_ARRAY ST:[min: Chris Aniszczyk, max: Dmitriy Ryaboy, num_nulls: 1]
.phoneNumber:       BINARY ZSTD DO:0 FPO:402 SZ:113/95/0.84 VC:3 ENC:RLE,DELTA_LENGTH_BYTE_ARRAY ST:[min: 555 987 6543, max: 555 987 6543, num_nulls: 2]

    owner TV=2 RL=0 DL=0
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:50 VC:2

    ownerPhoneNumbers TV=3 RL=1 DL=1
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:64 VC:2
    page 1:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:17 VC:1

    contacts.name TV=3 RL=1 DL=1
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] CRC:[verified] SZ:73 VC:2
    page 1:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] CRC:[verified] SZ:17 VC:1

    contacts.phoneNumber TV=3 RL=1 DL=2
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:33 VC:2
    page 1:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] CRC:[PAGE CORRUPT] SZ:17 VC:1

BINARY owner
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 2 ***
value 1: R:0 D:0 V:Julien Le Dem
value 2: R:0 D:0 V:A. Nonymous

BINARY ownerPhoneNumbers
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:1 V:555 123 4567
value 2: R:1 D:1 V:555 666 1337
value 3: R:0 D:0 V:<null>

BINARY contacts.name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:1 V:Dmitriy Ryaboy
value 2: R:1 D:1 V:Chris Aniszczyk
value 3: R:0 D:0 V:<null>

BINARY contacts.phoneNumber
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:2 V:555 987 6543
value 2: R:1 D:1 V:<null>
value 3: R:0 D:0 V:<null>
`,
	},

	{
		scenario: "example from the twitter blog (v2)",
		version:  v2,
		rows: []interface{}{
			AddressBook{
				Owner: "Julien Le Dem",
				OwnerPhoneNumbers: []string{
					"555 123 4567",
					"555 666 1337",
				},
				Contacts: []Contact{
					{
						Name:        "Dmitriy Ryaboy",
						PhoneNumber: "555 987 6543",
					},
					{
						Name: "Chris Aniszczyk",
					},
				},
			},
			AddressBook{
				Owner:             "A. Nonymous",
				OwnerPhoneNumbers: nil,
			},
		},

		dump: `row group 0
--------------------------------------------------------------------------------
owner:              BINARY ZSTD DO:0 FPO:4 SZ:86/78/0.91 VC:2 ENC:DELTA_LENGTH_BYTE_ARRAY ST:[min: A. Nonymous, max: Julien Le Dem, num_nulls not defined]
ownerPhoneNumbers:  BINARY GZIP DO:0 FPO:90 SZ:172/122/0.71 VC:3 ENC:RLE,DELTA_LENGTH_BYTE_ARRAY ST:[min: 555 123 4567, max: 555 666 1337, num_nulls: 1]
contacts:
.name:              BINARY UNCOMPRESSED DO:0 FPO:262 SZ:132/132/1.00 VC:3 ENC:RLE,DELTA_LENGTH_BYTE_ARRAY ST:[min: Chris Aniszczyk, max: Dmitriy Ryaboy, num_nulls: 1]
.phoneNumber:       BINARY ZSTD DO:0 FPO:394 SZ:108/90/0.83 VC:3 ENC:RLE,DELTA_LENGTH_BYTE_ARRAY ST:[min: 555 987 6543, max: 555 987 6543, num_nulls: 2]

    owner TV=2 RL=0 DL=0
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] SZ:50 VC:2

    ownerPhoneNumbers TV=3 RL=1 DL=1
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] SZ:56 VC:2
    page 1:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] SZ:9 VC:1

    contacts.name TV=3 RL=1 DL=1
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] SZ:65 VC:2
    page 1:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] SZ:9 VC:1

    contacts.phoneNumber TV=3 RL=1 DL=2
    ----------------------------------------------------------------------------
    page 0:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] SZ:25 VC:2
    page 1:  DLE:RLE RLE:RLE VLE:DELTA_LENGTH_BYTE_ARRAY ST:[no stats for this column] SZ:9 VC:1

BINARY owner
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 2 ***
value 1: R:0 D:0 V:Julien Le Dem
value 2: R:0 D:0 V:A. Nonymous

BINARY ownerPhoneNumbers
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:1 V:555 123 4567
value 2: R:1 D:1 V:555 666 1337
value 3: R:0 D:0 V:<null>

BINARY contacts.name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:1 V:Dmitriy Ryaboy
value 2: R:1 D:1 V:Chris Aniszczyk
value 3: R:0 D:0 V:<null>

BINARY contacts.phoneNumber
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 3 ***
value 1: R:0 D:2 V:555 987 6543
value 2: R:1 D:1 V:<null>
value 3: R:0 D:0 V:<null>
`,
	},

	{
		scenario: "omit `-` fields",
		version:  v1,
		rows: []interface{}{
			&event{Name: "customer1", Type: "request", Value: 42.0},
			&event{Name: "customer2", Type: "access", Value: 1.0},
		},
		dump: `row group 0
--------------------------------------------------------------------------------
name:   BINARY UNCOMPRESSED DO:4 FPO:49 SZ:73/73/1.00 VC:2 ENC:RLE_DICTIONARY,PLAIN ST:[min: customer1, max: customer2, num_nulls not defined]
value:  DOUBLE UNCOMPRESSED DO:0 FPO:77 SZ:39/39/1.00 VC:2 ENC:PLAIN ST:[min: 1.0, max: 42.0, num_nulls not defined]

    name TV=2 RL=0 DL=0 DS: 2 DE:PLAIN
    ----------------------------------------------------------------------------
    page 0:                  DLE:RLE RLE:RLE VLE:RLE_DICTIONARY ST:[no stats for this column] CRC:[verified] SZ:5 VC:2

    value TV=2 RL=0 DL=0
    ----------------------------------------------------------------------------
    page 0:                  DLE:RLE RLE:RLE VLE:PLAIN ST:[no stats for this column] CRC:[verified] SZ:16 VC:2

BINARY name
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 2 ***
value 1: R:0 D:0 V:customer1
value 2: R:0 D:0 V:customer2

DOUBLE value
--------------------------------------------------------------------------------
*** row group 1 of 1, values 1 to 2 ***
value 1: R:0 D:0 V:42.0
value 2: R:0 D:0 V:1.0
`,
	},
}

func TestWriter(t *testing.T) {
	if !hasParquetTools() {
		t.Skip("Skipping TestWriter writerTests because parquet-tools are not installed in Github CI. FIXME.") // TODO
	}

	for _, test := range writerTests {
		dataPageVersion := test.version
		codec := test.codec
		rows := test.rows
		dump := test.dump

		t.Run(test.scenario, func(t *testing.T) {
			t.Parallel()

			b, err := generateParquetFile(makeRows(rows),
				parquet.DataPageVersion(dataPageVersion),
				parquet.Compression(codec),
			)
			if err != nil {
				t.Logf("\n%s", string(b))
				t.Fatal(err)
			}

			if string(b) != dump {
				edits := myers.ComputeEdits(span.URIFromPath("want.txt"), dump, string(b))
				diff := fmt.Sprint(gotextdiff.ToUnified("want.txt", "got.txt", dump, edits))
				t.Errorf("\n%s", diff)
			}
		})
	}
}

func hasParquetTools() bool {
	_, err := exec.LookPath("parquet-tools")
	return err == nil
}

func parquetTools(cmd, path string) ([]byte, error) {
	p := exec.Command("parquet-tools", cmd, "--debug", "--disable-crop", path)

	output, err := p.CombinedOutput()
	if err != nil {
		return output, err
	}

	// parquet-tools has trailing spaces on some lines
	lines := bytes.Split(output, []byte("\n"))

	for i, line := range lines {
		lines[i] = bytes.TrimRight(line, " ")
	}

	return bytes.Join(lines, []byte("\n")), nil
}

func TestWriterGenerateBloomFilters(t *testing.T) {
	type Person struct {
		FirstName utf8string `parquet:"first_name"`
		LastName  utf8string `parquet:"last_name"`
	}

	err := quickCheck(func(rows []Person) bool {
		if len(rows) == 0 { // TODO: support writing files with no rows
			return true
		}

		buffer := new(bytes.Buffer)
		writer := parquet.NewWriter(buffer,
			parquet.BloomFilters(
				parquet.SplitBlockFilter(10, "last_name"),
			),
		)
		for i := range rows {
			if err := writer.Write(&rows[i]); err != nil {
				t.Error(err)
				return false
			}
		}
		if err := writer.Close(); err != nil {
			t.Error(err)
			return false
		}

		reader := bytes.NewReader(buffer.Bytes())
		f, err := parquet.OpenFile(reader, reader.Size())
		if err != nil {
			t.Error(err)
			return false
		}
		rowGroup := f.RowGroups()[0]
		columns := rowGroup.ColumnChunks()
		firstName := columns[0]
		lastName := columns[1]

		if firstName.BloomFilter() != nil {
			t.Errorf(`"first_name" column has a bloom filter even though none were configured`)
			return false
		}

		bloomFilter := lastName.BloomFilter()
		if bloomFilter == nil {
			t.Error(`"last_name" column has no bloom filter despite being configured to have one`)
			return false
		}

		for i, row := range rows {
			if ok, err := bloomFilter.Check(parquet.ValueOf(row.LastName)); err != nil {
				t.Errorf("unexpected error checking bloom filter: %v", err)
				return false
			} else if !ok {
				t.Errorf("bloom filter does not contain value %q of row %d", row.LastName, i)
				return false
			}
		}

		return true
	})
	if err != nil {
		t.Error(err)
	}
}

func TestBloomFilterForDict(t *testing.T) {
	type testStruct struct {
		A string `parquet:"a,dict"`
	}

	schema := parquet.SchemaOf(&testStruct{})

	b := bytes.NewBuffer(nil)
	w := parquet.NewWriter(
		b,
		schema,
		parquet.BloomFilters(parquet.SplitBlockFilter(10, "a")),
	)

	err := w.Write(&testStruct{A: "test"})
	if err != nil {
		t.Fatal(err)
	}

	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
	if err != nil {
		t.Fatal(err)
	}

	ok, err := f.RowGroups()[0].ColumnChunks()[0].BloomFilter().Check(parquet.ValueOf("test"))
	if err != nil {
		t.Fatal(err)
	}
	if !ok {
		t.Error("bloom filter should have contained 'test'")
	}
}

func TestWriterRepeatedUUIDDict(t *testing.T) {
	inputID := uuid.MustParse("123456ab-0000-0000-0000-000000000000")
	records := []struct {
		List []uuid.UUID `parquet:"list,dict"`
	}{{
		[]uuid.UUID{inputID},
	}}
	schema := parquet.SchemaOf(&records[0])
	b := bytes.NewBuffer(nil)
	w := parquet.NewWriter(b, schema)
	if err := w.Write(records[0]); err != nil {
		t.Fatal(err)
	}
	if err := w.Close(); err != nil {
		t.Fatal(err)
	}

	f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
	if err != nil {
		t.Fatal(err)
	}

	rowbuf := make([]parquet.Row, 1)
	rows := f.RowGroups()[0].Rows()
	defer rows.Close()
	n, err := rows.ReadRows(rowbuf)
	if n == 0 {
		t.Fatalf("reading row from parquet file: %v", err)
	}
	if len(rowbuf[0]) != 1 {
		t.Errorf("expected 1 value in row, got %d", len(rowbuf[0]))
	}
	if !bytes.Equal(inputID[:], rowbuf[0][0].Bytes()) {
		t.Errorf("expected to get UUID %q back out, got %q", inputID, rowbuf[0][0].Bytes())
	}
}

func TestWriterResetWithBloomFilters(t *testing.T) {
	type Test struct {
		Value string `parquet:"value,dict"`
	}

	writer := parquet.NewWriter(new(bytes.Buffer),
		parquet.BloomFilters(
			parquet.SplitBlockFilter(10, "value"),
		),
	)

	if err := writer.Write(&Test{Value: "foo"}); err != nil {
		t.Fatal(err)
	}

	if err := writer.Close(); err != nil {
		t.Fatal(err)
	}

	writer.Reset(new(bytes.Buffer))

	if err := writer.Write(&Test{Value: "bar"}); err != nil {
		t.Fatal(err)
	}

	if err := writer.Close(); err != nil {
		t.Fatal(err)
	}
}

func TestWriterMaxRowsPerRowGroup(t *testing.T) {
	output := new(bytes.Buffer)
	writer := parquet.NewWriter(output, parquet.MaxRowsPerRowGroup(10))

	for i := 0; i < 100; i++ {
		err := writer.Write(struct{ FirstName, LastName string }{
			FirstName: "0123456789"[i%10 : i%10+1],
			LastName:  "foo",
		})
		if err != nil {
			t.Fatal(err)
		}
	}

	if err := writer.Close(); err != nil {
		t.Fatal(err)
	}

	f, err := parquet.OpenFile(bytes.NewReader(output.Bytes()), int64(output.Len()))
	if err != nil {
		t.Fatal(err)
	}

	rowGroups := f.RowGroups()
	if len(rowGroups) != 10 {
		t.Errorf("wrong number of row groups in parquet file: want=10 got=%d", len(rowGroups))
	}
}

func TestSetKeyValueMetadata(t *testing.T) {
	testKey := "test-key"
	testValue := "test-value"

	type testStruct struct {
		A string `parquet:"a,dict"`
	}

	schema := parquet.SchemaOf(&testStruct{})

	b := bytes.NewBuffer(nil)
	w := parquet.NewWriter(
		b,
		schema,
	)

	err := w.Write(&testStruct{A: "test"})
	if err != nil {
		t.Fatal(err)
	}

	w.SetKeyValueMetadata(testKey, testValue)

	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
	if err != nil {
		t.Fatal(err)
	}

	value, ok := f.Lookup(testKey)
	if !ok {
		t.Fatalf("key/value metadata should have included %q", testKey)
	}
	if value != testValue {
		t.Errorf("expected %q, got %q", testValue, value)
	}
}

func TestSetKeyValueMetadataOverwritesExisting(t *testing.T) {
	testKey := "test-key"
	testValue := "test-value"

	type testStruct struct {
		A string `parquet:"a,dict"`
	}

	schema := parquet.SchemaOf(&testStruct{})

	b := bytes.NewBuffer(nil)
	w := parquet.NewWriter(
		b,
		schema,
		parquet.KeyValueMetadata(testKey, "original-value"),
	)

	err := w.Write(&testStruct{A: "test"})
	if err != nil {
		t.Fatal(err)
	}

	w.SetKeyValueMetadata(testKey, testValue)

	err = w.Close()
	if err != nil {
		t.Fatal(err)
	}

	f, err := parquet.OpenFile(bytes.NewReader(b.Bytes()), int64(b.Len()))
	if err != nil {
		t.Fatal(err)
	}

	value, ok := f.Lookup(testKey)
	if !ok {
		t.Fatalf("key/value metadata should have included %q", testKey)
	}
	if value != testValue {
		t.Errorf("expected %q, got %q", testValue, value)
	}
}