Showing preview only (1,615K chars total). Download the full file or copy to clipboard to get everything.
Repository: segmentio/parquet-go
Branch: main
Commit: 5d42db8f0d47
Files: 321
Total size: 1.5 MB
Directory structure:
gitextract_ou8jci6u/
├── .gitattributes
├── .github/
│ └── workflows/
│ └── test.yml
├── .gitignore
├── .mailmap
├── .words
├── AUTHORS.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── allocator.go
├── array.go
├── array_go18.go
├── bitmap.go
├── bloom/
│ ├── block.go
│ ├── block_amd64.go
│ ├── block_amd64.s
│ ├── block_default.go
│ ├── block_optimized.go
│ ├── block_test.go
│ ├── bloom.go
│ ├── bloom_test.go
│ ├── filter.go
│ ├── filter_amd64.go
│ ├── filter_amd64.s
│ ├── filter_default.go
│ ├── filter_test.go
│ ├── hash.go
│ └── xxhash/
│ ├── LICENSE
│ ├── sum64uint.go
│ ├── sum64uint_amd64.go
│ ├── sum64uint_amd64.s
│ ├── sum64uint_purego.go
│ ├── sum64uint_test.go
│ ├── xxhash.go
│ ├── xxhash_amd64.go
│ ├── xxhash_amd64.s
│ ├── xxhash_purego.go
│ └── xxhash_test.go
├── bloom.go
├── bloom_test.go
├── buffer.go
├── buffer_go18.go
├── buffer_go18_test.go
├── buffer_internal_test.go
├── buffer_pool.go
├── buffer_pool_test.go
├── buffer_test.go
├── column.go
├── column_buffer.go
├── column_buffer_amd64.go
├── column_buffer_amd64.s
├── column_buffer_go18.go
├── column_buffer_purego.go
├── column_buffer_test.go
├── column_chunk.go
├── column_index.go
├── column_index_internal_test.go
├── column_index_test.go
├── column_mapping.go
├── column_mapping_test.go
├── column_path.go
├── column_test.go
├── compare.go
├── compare_test.go
├── compress/
│ ├── brotli/
│ │ └── brotli.go
│ ├── compress.go
│ ├── compress_test.go
│ ├── gzip/
│ │ └── gzip.go
│ ├── lz4/
│ │ └── lz4.go
│ ├── snappy/
│ │ └── snappy.go
│ ├── uncompressed/
│ │ └── uncompressed.go
│ └── zstd/
│ └── zstd.go
├── compress.go
├── config.go
├── convert.go
├── convert_test.go
├── dedupe.go
├── dedupe_test.go
├── deprecated/
│ ├── int96.go
│ ├── int96_test.go
│ └── parquet.go
├── dictionary.go
├── dictionary_amd64.go
├── dictionary_amd64.s
├── dictionary_purego.go
├── dictionary_test.go
├── encoding/
│ ├── bitpacked/
│ │ ├── bitpacked.go
│ │ └── bitpacked_test.go
│ ├── bytestreamsplit/
│ │ ├── bytestreamsplit.go
│ │ ├── bytestreamsplit_amd64.go
│ │ ├── bytestreamsplit_amd64.s
│ │ ├── bytestreamsplit_purego.go
│ │ └── bytestreamsplit_test.go
│ ├── delta/
│ │ ├── binary_packed.go
│ │ ├── binary_packed_amd64.go
│ │ ├── binary_packed_amd64.s
│ │ ├── binary_packed_amd64_test.go
│ │ ├── binary_packed_purego.go
│ │ ├── binary_packed_test.go
│ │ ├── byte_array.go
│ │ ├── byte_array_amd64.go
│ │ ├── byte_array_amd64.s
│ │ ├── byte_array_purego.go
│ │ ├── byte_array_test.go
│ │ ├── delta.go
│ │ ├── delta_amd64.go
│ │ ├── delta_amd64.s
│ │ ├── delta_test.go
│ │ ├── length_byte_array.go
│ │ ├── length_byte_array_amd64.go
│ │ ├── length_byte_array_amd64.s
│ │ ├── length_byte_array_purego.go
│ │ ├── length_byte_array_test.go
│ │ └── testdata/
│ │ └── fuzz/
│ │ └── FuzzDeltaByteArray/
│ │ ├── 2404234dd7e87c04303eb7e58208d5b2ccb04fb616c18f3254e2375c4bc327e3
│ │ ├── 4cf9c92e5a2096e3d6c42eaf9b1e31d2567854d33e06c8d2d7a8c46437345850
│ │ ├── 9b210529f5e34e2dea5824929bf0d8242dc9c3165c0dce10bb376c50e21b38cc
│ │ └── fbe137144bcda3a149c8ea109703f3242192c5480ea1e82dde0ea24e94f3afef
│ ├── encoding.go
│ ├── encoding_test.go
│ ├── fuzz/
│ │ └── fuzz.go
│ ├── notsupported.go
│ ├── plain/
│ │ ├── dictionary.go
│ │ ├── plain.go
│ │ └── plain_test.go
│ ├── rle/
│ │ ├── dictionary.go
│ │ ├── rle.go
│ │ ├── rle_amd64.go
│ │ ├── rle_amd64.s
│ │ ├── rle_amd64_test.go
│ │ ├── rle_purego.go
│ │ ├── rle_test.go
│ │ └── testdata/
│ │ └── fuzz/
│ │ ├── FuzzEncodeBoolean/
│ │ │ ├── 6be5e340694798c2e5b94c758f0262edd2edf8af5795d4c6c60f6e02643bbb96
│ │ │ └── 9772b3f21a6f61810fe38d120bcc9da6d78540f22dc819a4201283608671fdf4
│ │ ├── FuzzEncodeInt32/
│ │ │ └── 06ba4bdb19de593e669c642987e270fe2488d4d58ecd712db136a3e011071253
│ │ └── FuzzEncodeLevels/
│ │ └── 0468684de48f926219bfc47be13ddf085b5a0ed9fbd9c40a005641b253e88d33
│ ├── test/
│ │ ├── test_go17.go
│ │ └── test_go18.go
│ ├── values.go
│ └── values_test.go
├── encoding.go
├── errors.go
├── example_test.go
├── file.go
├── file_test.go
├── filter.go
├── filter_test.go
├── format/
│ ├── parquet.go
│ └── parquet_test.go
├── go.mod
├── go.sum
├── hashprobe/
│ ├── aeshash/
│ │ ├── aeshash.go
│ │ ├── aeshash_amd64.go
│ │ ├── aeshash_amd64.s
│ │ ├── aeshash_purego.go
│ │ └── aeshash_test.go
│ ├── hashprobe.go
│ ├── hashprobe_amd64.go
│ ├── hashprobe_amd64.s
│ ├── hashprobe_purego.go
│ ├── hashprobe_test.go
│ └── wyhash/
│ ├── wyhash.go
│ ├── wyhash_amd64.go
│ ├── wyhash_amd64.s
│ ├── wyhash_purego.go
│ └── wyhash_test.go
├── internal/
│ ├── bitpack/
│ │ ├── bitpack.go
│ │ ├── masks_int32_amd64.s
│ │ ├── pack.go
│ │ ├── unpack.go
│ │ ├── unpack_int32_amd64.go
│ │ ├── unpack_int32_amd64.s
│ │ ├── unpack_int32_purego.go
│ │ ├── unpack_int64_amd64.go
│ │ ├── unpack_int64_amd64.s
│ │ ├── unpack_int64_purego.go
│ │ └── unpack_test.go
│ ├── bytealg/
│ │ ├── broadcast_amd64.go
│ │ ├── broadcast_amd64.s
│ │ ├── broadcast_purego.go
│ │ ├── broadcast_test.go
│ │ ├── bytealg.go
│ │ ├── bytealg_amd64.go
│ │ ├── bytealg_test.go
│ │ ├── count_amd64.go
│ │ ├── count_amd64.s
│ │ ├── count_purego.go
│ │ └── count_test.go
│ ├── debug/
│ │ ├── debug.go
│ │ ├── finalizer_off.go
│ │ └── finalizer_on.go
│ ├── quick/
│ │ └── quick.go
│ └── unsafecast/
│ ├── unsafecast_go17.go
│ ├── unsafecast_go18.go
│ └── unsafecast_go18_test.go
├── level.go
├── limits.go
├── merge.go
├── merge_test.go
├── multi_row_group.go
├── node.go
├── null.go
├── null_amd64.go
├── null_amd64.s
├── null_purego.go
├── null_test.go
├── offset_index.go
├── order.go
├── order_amd64.go
├── order_amd64.s
├── order_purego.go
├── order_test.go
├── page.go
├── page_bounds.go
├── page_bounds_amd64.go
├── page_bounds_amd64.s
├── page_bounds_purego.go
├── page_bounds_test.go
├── page_header.go
├── page_max.go
├── page_max_amd64.go
├── page_max_amd64.s
├── page_max_purego.go
├── page_max_test.go
├── page_min.go
├── page_min_amd64.go
├── page_min_amd64.s
├── page_min_purego.go
├── page_min_test.go
├── page_test.go
├── page_values.go
├── parquet.go
├── parquet_amd64.go
├── parquet_go18.go
├── parquet_go18_test.go
├── parquet_test.go
├── print.go
├── print_test.go
├── reader.go
├── reader_go18.go
├── reader_go18_test.go
├── reader_test.go
├── row.go
├── row_buffer.go
├── row_buffer_test.go
├── row_builder.go
├── row_builder_test.go
├── row_group.go
├── row_group_test.go
├── row_test.go
├── scan.go
├── scan_test.go
├── schema.go
├── schema_test.go
├── search.go
├── search_test.go
├── sorting.go
├── sorting_test.go
├── sparse/
│ ├── array.go
│ ├── gather.go
│ ├── gather_amd64.go
│ ├── gather_amd64.s
│ ├── gather_purego.go
│ ├── gather_test.go
│ └── sparse.go
├── testdata/
│ ├── alltypes_dictionary.parquet
│ ├── alltypes_plain.parquet
│ ├── alltypes_plain.snappy.parquet
│ ├── alltypes_tiny_pages.parquet
│ ├── alltypes_tiny_pages_plain.parquet
│ ├── binary.parquet
│ ├── byte_array_decimal.parquet
│ ├── cluster_test_table_1.snappy.parquet
│ ├── cluster_test_table_2.snappy.parquet
│ ├── covid.snappy.parquet
│ ├── data_index_bloom_encoding_stats.parquet
│ ├── datapage_v2.snappy.parquet
│ ├── delta_binary_packed.parquet
│ ├── delta_byte_array.parquet
│ ├── delta_encoding_optional_column.parquet
│ ├── delta_encoding_required_column.parquet
│ ├── delta_length_byte_array.parquet
│ ├── dict-page-offset-zero.parquet
│ ├── dms_test_table_LOAD00000001.parquet
│ ├── empty.parquet
│ ├── file.parquet
│ ├── fixed_length_decimal.parquet
│ ├── fixed_length_decimal_legacy.parquet
│ ├── int32_decimal.parquet
│ ├── int64_decimal.parquet
│ ├── issue368.parquet
│ ├── list_columns.parquet
│ ├── lz4_raw_compressed.parquet
│ ├── lz4_raw_compressed_larger.parquet
│ ├── nested_lists.snappy.parquet
│ ├── nested_maps.snappy.parquet
│ ├── nested_structs.rust.parquet
│ ├── nonnullable.impala.parquet
│ ├── null_list.parquet
│ ├── nullable.impala.parquet
│ ├── nulls.snappy.parquet
│ ├── repeated_no_annotation.parquet
│ ├── rle_boolean_encoding.parquet
│ ├── single_nan.parquet
│ ├── small.parquet
│ └── trace.snappy.parquet
├── transform.go
├── transform_test.go
├── type.go
├── value.go
├── value_amd64.go
├── value_amd64.s
├── value_go17.go
├── value_go18.go
├── value_test.go
├── values_purego.go
├── writer.go
├── writer_go18.go
├── writer_go18_test.go
└── writer_test.go
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitattributes
================================================
internal/gen-go/* linguist-generated=true
================================================
FILE: .github/workflows/test.yml
================================================
name: Test
on:
push:
branches:
- main
pull_request:
branches:
- '*'
jobs:
test:
strategy:
matrix:
go:
- '1.17.x'
- '1.18.x'
- '1.19.x'
tags:
- ''
- purego
label:
- [self-hosted, linux, arm64, segment]
- ubuntu-latest
runs-on: ${{ matrix.label }}
env:
PARQUETGODEBUG: tracebuf=1
steps:
- uses: actions/checkout@v3
- name: Setup Go ${{ matrix.go }}
uses: actions/setup-go@v3
with:
go-version: ${{ matrix.go }}
- name: Download Dependencies
run: go mod download
- name: Run Tests
run: go test -trimpath -race -tags=${{ matrix.tags }} ./...
- name: Run Benchmarks
run: go test -trimpath -short -tags=${{ matrix.tags }} -run '^$' -bench . -benchtime 1x ./...
format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Setup Go ${{ matrix.go }}
uses: actions/setup-go@v3
with:
go-version: 1.19.x
- name: Validate formatting
run: make format
# https://github.com/golangci/golangci-lint/issues/2649
# lint:
# runs-on: ubuntu-latest
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-go@v3
# with:
# go-version: 1.18.x
# - name: golangci-lint
# uses: golangci/golangci-lint-action@v3
# with:
# version: latest
================================================
FILE: .gitignore
================================================
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib
# Test binary, built with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Dependency directories (remove the comment below to include it)
# vendor/
# Emacs
*~
#*#
.#
================================================
FILE: .mailmap
================================================
Achille Roussel <achille@segment.com> Achille <achille@segment.com>
Thomas Pelletier <thomas.pelletier@segment.com> Thomas Pelletier <pelletier.thomas@gmail.com>
================================================
FILE: .words
================================================
RowType
Twilio
bottlenecked
decompressors
int96
millis
nanos
reindexing
repositions
schemas
ColumnPages
PageIndex
Zstandard
xxHash
cardinality
enums
32bit
dic
Blart
Versenwald
purego
stdlib
unscaled
cespare
bitset
checksumming
================================================
FILE: AUTHORS.txt
================================================
Achille Roussel <achille@segment.com>
Frederic Branczyk <fbranczyk@gmail.com>
Julien Fabre <julien@segment.com>
Kevin Burke <kevin.burke@segment.com>
Thomas Pelletier <thomas.pelletier@segment.com>
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
- Using welcoming and inclusive language
- Being respectful of differing viewpoints and experiences
- Gracefully accepting constructive criticism
- Focusing on what is best for the community
- Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
- The use of sexualized language or imagery and unwelcome sexual attention or
advances
- Trolling, insulting/derogatory comments, and personal or political attacks
- Public or private harassment
- Publishing others' private information, such as a physical or electronic
address, without explicit permission
- Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at open-source@twilio.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
[homepage]: https://www.contributor-covenant.org
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to segmentio/parquet
## Code of Conduct
Help us keep the project open and inclusive. Please be kind to and
considerate of other developers, as we all have the same goal: make
the project as good as it can be.
* [Code of Conduct](./CODE_OF_CONDUCT.md)
## Licensing
All third party contributors acknowledge that any contributions they provide
will be made under the same open source license that the open source project
is provided under.
## Contributing
* Open an Issue to report bugs or discuss non-trivial changes.
* Open a Pull Request to submit a code change for review.
### Coding Rules
To ensure consistency throughout the source code, keep these rules in mind
when submitting contributions:
* All features or bug fixes must be tested by one or more tests.
* All exported types, functions, and symbols must be documented.
* All code must be formatted with `go fmt`.
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--------------------------------------------------------------------------------
This product includes code from Apache Parquet.
* deprecated/parquet.go is based on Apache Parquet's thrift file
* format/parquet.go is based on Apache Parquet's thrift file
Copyright: 2014 The Apache Software Foundation.
Home page: https://github.com/apache/parquet-format
License: http://www.apache.org/licenses/LICENSE-2.0
================================================
FILE: Makefile
================================================
.PHONY: format
AUTHORS.txt: .mailmap
go install github.com/kevinburke/write_mailmap@latest
write_mailmap > AUTHORS.txt
format:
go install github.com/kevinburke/differ@latest
differ gofmt -w .
test:
go test -v -trimpath -race -tags= ./...
================================================
FILE: README.md
================================================
# Project has been Archived
Development has moved to https://github.com/parquet-go/parquet-go. No API's have
changed, we just decided to create a new organization for this library. Thank
you to all of the contributors for your hard work.
# segmentio/parquet-go
High-performance Go library to manipulate parquet files.
================================================
FILE: allocator.go
================================================
package parquet
import "github.com/segmentio/parquet-go/internal/unsafecast"
type allocator struct{ buffer []byte }
func (a *allocator) makeBytes(n int) []byte {
if free := cap(a.buffer) - len(a.buffer); free < n {
newCap := 2 * cap(a.buffer)
if newCap == 0 {
newCap = 4096
}
for newCap < n {
newCap *= 2
}
a.buffer = make([]byte, 0, newCap)
}
i := len(a.buffer)
j := len(a.buffer) + n
a.buffer = a.buffer[:j]
return a.buffer[i:j:j]
}
func (a *allocator) copyBytes(v []byte) []byte {
b := a.makeBytes(len(v))
copy(b, v)
return b
}
func (a *allocator) copyString(v string) string {
b := a.makeBytes(len(v))
copy(b, v)
return unsafecast.BytesToString(b)
}
func (a *allocator) reset() {
a.buffer = a.buffer[:0]
}
// rowAllocator is a memory allocator used to make a copy of rows referencing
// memory buffers that parquet-go does not have ownership of.
//
// This type is used in the implementation of various readers and writers that
// need to capture rows passed to the ReadRows/WriteRows methods. Copies to a
// local buffer is necessary in those cases to repect the reader/writer
// contracts that do not allow the implementations to retain the rows they
// are passed as arguments.
//
// See: RowBuffer, DedupeRowReader, DedupeRowWriter
type rowAllocator struct{ allocator }
func (a *rowAllocator) capture(row Row) {
for i, v := range row {
switch v.Kind() {
case ByteArray, FixedLenByteArray:
row[i].ptr = unsafecast.AddressOfBytes(a.copyBytes(v.byteArray()))
}
}
}
================================================
FILE: array.go
================================================
package parquet
import (
"unsafe"
"github.com/segmentio/parquet-go/sparse"
)
func makeArrayValue(values []Value, offset uintptr) sparse.Array {
ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
return sparse.UnsafeArray(unsafe.Add(ptr, offset), len(values), unsafe.Sizeof(Value{}))
}
func makeArrayString(values []string) sparse.Array {
str := ""
ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof(str))
}
func makeArrayBE128(values []*[16]byte) sparse.Array {
ptr := *(*unsafe.Pointer)(unsafe.Pointer(&values))
return sparse.UnsafeArray(ptr, len(values), unsafe.Sizeof((*[16]byte)(nil)))
}
================================================
FILE: array_go18.go
================================================
//go:build go1.18
package parquet
import (
"unsafe"
"github.com/segmentio/parquet-go/internal/unsafecast"
"github.com/segmentio/parquet-go/sparse"
)
func makeArray(base unsafe.Pointer, length int, offset uintptr) sparse.Array {
return sparse.UnsafeArray(base, length, offset)
}
func makeArrayOf[T any](s []T) sparse.Array {
var model T
return makeArray(unsafecast.PointerOf(s), len(s), unsafe.Sizeof(model))
}
func makeSlice[T any](a sparse.Array) []T {
return slice[T](a.Index(0), a.Len())
}
func slice[T any](p unsafe.Pointer, n int) []T {
return unsafe.Slice((*T)(p), n)
}
type sliceHeader struct {
base unsafe.Pointer
len int
cap int
}
================================================
FILE: bitmap.go
================================================
package parquet
import "sync"
type bitmap struct {
bits []uint64
}
func (m *bitmap) reset(size int) {
size = (size + 63) / 64
if cap(m.bits) < size {
m.bits = make([]uint64, size, 2*size)
} else {
m.bits = m.bits[:size]
m.clear()
}
}
func (m *bitmap) clear() {
for i := range m.bits {
m.bits[i] = 0
}
}
var (
bitmapPool sync.Pool // *bitmap
)
func acquireBitmap(n int) *bitmap {
b, _ := bitmapPool.Get().(*bitmap)
if b == nil {
b = &bitmap{bits: make([]uint64, n, 2*n)}
} else {
b.reset(n)
}
return b
}
func releaseBitmap(b *bitmap) {
if b != nil {
bitmapPool.Put(b)
}
}
================================================
FILE: bloom/block.go
================================================
package bloom
import "unsafe"
// Word represents 32 bits words of bloom filter blocks.
type Word uint32
// Block represents bloom filter blocks which contain eight 32 bits words.
type Block [8]Word
// Bytes returns b as a byte slice.
func (b *Block) Bytes() []byte {
return unsafe.Slice((*byte)(unsafe.Pointer(b)), BlockSize)
}
const (
// BlockSize is the size of bloom filter blocks in bytes.
BlockSize = 32
salt0 = 0x47b6137b
salt1 = 0x44974d91
salt2 = 0x8824ad5b
salt3 = 0xa2b7289d
salt4 = 0x705495c7
salt5 = 0x2df1424b
salt6 = 0x9efc4947
salt7 = 0x5c6bfb31
)
================================================
FILE: bloom/block_amd64.go
================================================
//go:build !purego
package bloom
import "golang.org/x/sys/cpu"
// The functions in this file are SIMD-optimized versions of the functions
// declared in block_optimized.go for x86 targets.
//
// The optimization yields measurable improvements over the pure Go versions:
//
// goos: darwin
// goarch: amd64
// pkg: github.com/segmentio/parquet-go/bloom
// cpu: Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
//
// name old time/op new time/op delta
// BlockInsert 11.6ns ± 4% 2.0ns ± 3% -82.37% (p=0.000 n=8+8)
// BlockCheck 12.6ns ±28% 2.1ns ± 4% -83.12% (p=0.000 n=10+8)
//
// name old speed new speed delta
// BlockInsert 2.73GB/s ±13% 15.70GB/s ± 3% +475.96% (p=0.000 n=9+8)
// BlockCheck 2.59GB/s ±23% 15.06GB/s ± 4% +482.25% (p=0.000 n=10+8)
//
// Note that the numbers above are a comparison to the routines implemented in
// block_optimized.go; the delta comparing to functions in block_default.go is
// significantly larger but not very interesting since those functions have no
// practical use cases.
var hasAVX2 = cpu.X86.HasAVX2
//go:noescape
func blockInsert(b *Block, x uint32)
//go:noescape
func blockCheck(b *Block, x uint32) bool
func (b *Block) Insert(x uint32) { blockInsert(b, x) }
func (b *Block) Check(x uint32) bool { return blockCheck(b, x) }
================================================
FILE: bloom/block_amd64.s
================================================
//go:build !purego
#include "textflag.h"
#define salt0 0x47b6137b
#define salt1 0x44974d91
#define salt2 0x8824ad5b
#define salt3 0xa2b7289d
#define salt4 0x705495c7
#define salt5 0x2df1424b
#define salt6 0x9efc4947
#define salt7 0x5c6bfb31
DATA ones+0(SB)/4, $1
DATA ones+4(SB)/4, $1
DATA ones+8(SB)/4, $1
DATA ones+12(SB)/4, $1
DATA ones+16(SB)/4, $1
DATA ones+20(SB)/4, $1
DATA ones+24(SB)/4, $1
DATA ones+28(SB)/4, $1
GLOBL ones(SB), RODATA|NOPTR, $32
DATA salt+0(SB)/4, $salt0
DATA salt+4(SB)/4, $salt1
DATA salt+8(SB)/4, $salt2
DATA salt+12(SB)/4, $salt3
DATA salt+16(SB)/4, $salt4
DATA salt+20(SB)/4, $salt5
DATA salt+24(SB)/4, $salt6
DATA salt+28(SB)/4, $salt7
GLOBL salt(SB), RODATA|NOPTR, $32
// This initial block is a SIMD implementation of the mask function declared in
// block_default.go and block_optimized.go. For each of the 8 x 32 bits words of
// the bloom filter block, the operation performed is:
//
// block[i] = 1 << ((x * salt[i]) >> 27)
//
// Arguments
// ---------
//
// * src is a memory location where the value to use when computing the mask is
// located. The memory location is not modified.
//
// * tmp is a YMM register used as scratch space to hold intermediary results in
// the algorithm.
//
// * dst is a YMM register where the final mask is written.
//
#define generateMask(src, tmp, dst) \
VMOVDQA ones(SB), dst \
VPBROADCASTD src, tmp \
VPMULLD salt(SB), tmp, tmp \
VPSRLD $27, tmp, tmp \
VPSLLVD tmp, dst, dst
#define insert(salt, src, dst) \
MOVL src, CX \
IMULL salt, CX \
SHRL $27, CX \
MOVL $1, DX \
SHLL CX, DX \
ORL DX, dst
#define check(salt, b, x) \
MOVL b, CX \
MOVL x, DX \
IMULL salt, DX \
SHRL $27, DX \
BTL DX, CX \
JAE notfound
// func blockInsert(b *Block, x uint32)
TEXT ·blockInsert(SB), NOSPLIT, $0-16
MOVQ b+0(FP), AX
CMPB ·hasAVX2(SB), $0
JE fallback
avx2:
generateMask(x+8(FP), Y1, Y0)
// Set all 1 bits of the mask in the bloom filter block.
VPOR (AX), Y0, Y0
VMOVDQU Y0, (AX)
VZEROUPPER
RET
fallback:
MOVL x+8(FP), BX
insert($salt0, BX, 0(AX))
insert($salt1, BX, 4(AX))
insert($salt2, BX, 8(AX))
insert($salt3, BX, 12(AX))
insert($salt4, BX, 16(AX))
insert($salt5, BX, 20(AX))
insert($salt6, BX, 24(AX))
insert($salt7, BX, 28(AX))
RET
// func blockCheck(b *Block, x uint32) bool
TEXT ·blockCheck(SB), NOSPLIT, $0-17
MOVQ b+0(FP), AX
CMPB ·hasAVX2(SB), $0
JE fallback
avx2:
generateMask(x+8(FP), Y1, Y0)
// Compare the 1 bits of the mask with the bloom filter block, then compare
// the result with the mask, expecting equality if the value `x` was present
// in the block.
VPAND (AX), Y0, Y1 // Y0 = block & mask
VPTEST Y0, Y1 // if (Y0 & ^Y1) != 0 { CF = 1 }
SETCS ret+16(FP) // return CF == 1
VZEROUPPER
RET
fallback:
MOVL x+8(FP), BX
check($salt0, 0(AX), BX)
check($salt1, 4(AX), BX)
check($salt2, 8(AX), BX)
check($salt3, 12(AX), BX)
check($salt4, 16(AX), BX)
check($salt5, 20(AX), BX)
check($salt6, 24(AX), BX)
check($salt7, 28(AX), BX)
MOVB $1, CX
JMP done
notfound:
XORB CX, CX
done:
MOVB CX, ret+16(FP)
RET
================================================
FILE: bloom/block_default.go
================================================
//go:build purego && parquet.bloom.no_unroll
package bloom
// This file contains direct translation of the algorithms described in the
// parquet bloom filter spec:
// https://github.com/apache/parquet-format/blob/master/BloomFilter.md
//
// There are no practical reasons to eable the parquet.bloom.no_unroll build
// tag, the code is left here as a reference to ensure that the optimized
// implementations of block operations behave the same as the functions in this
// file.
var salt = [8]uint32{
0: salt0,
1: salt1,
2: salt2,
3: salt3,
4: salt4,
5: salt5,
6: salt6,
7: salt7,
}
func (w *Word) set(i uint) {
*w |= Word(1 << i)
}
func (w Word) has(i uint) bool {
return ((w >> Word(i)) & 1) != 0
}
func mask(x uint32) Block {
var b Block
for i := uint(0); i < 8; i++ {
y := x * salt[i]
b[i].set(uint(y) >> 27)
}
return b
}
func (b *Block) Insert(x uint32) {
masked := mask(x)
for i := uint(0); i < 8; i++ {
for j := uint(0); j < 32; j++ {
if masked[i].has(j) {
b[i].set(j)
}
}
}
}
func (b *Block) Check(x uint32) bool {
masked := mask(x)
for i := uint(0); i < 8; i++ {
for j := uint(0); j < 32; j++ {
if masked[i].has(j) {
if !b[i].has(j) {
return false
}
}
}
}
return true
}
================================================
FILE: bloom/block_optimized.go
================================================
//go:build (!amd64 || purego) && !parquet.bloom.no_unroll
package bloom
// The functions in this file are optimized versions of the algorithms described
// in https://github.com/apache/parquet-format/blob/master/BloomFilter.md
//
// The functions are manual unrolling of the loops, which yield significant
// performance improvements:
//
// goos: darwin
// goarch: amd64
// pkg: github.com/segmentio/parquet-go/bloom
// cpu: Intel(R) Core(TM) i9-8950HK CPU @ 2.90GHz
//
// name old time/op new time/op delta
// BlockInsert 327ns ± 1% 12ns ± 4% -96.47% (p=0.000 n=9+8)
// BlockCheck 240ns ± 4% 13ns ±28% -94.75% (p=0.000 n=8+10)
//
// name old speed new speed delta
// BlockInsert 97.8MB/s ± 1% 2725.0MB/s ±13% +2686.59% (p=0.000 n=9+9)
// BlockCheck 133MB/s ± 4% 2587MB/s ±23% +1838.46% (p=0.000 n=8+10)
//
// The benchmarks measure throughput based on the byte size of a bloom filter
// block.
func (b *Block) Insert(x uint32) {
b[0] |= 1 << ((x * salt0) >> 27)
b[1] |= 1 << ((x * salt1) >> 27)
b[2] |= 1 << ((x * salt2) >> 27)
b[3] |= 1 << ((x * salt3) >> 27)
b[4] |= 1 << ((x * salt4) >> 27)
b[5] |= 1 << ((x * salt5) >> 27)
b[6] |= 1 << ((x * salt6) >> 27)
b[7] |= 1 << ((x * salt7) >> 27)
}
func (b *Block) Check(x uint32) bool {
return ((b[0] & (1 << ((x * salt0) >> 27))) != 0) &&
((b[1] & (1 << ((x * salt1) >> 27))) != 0) &&
((b[2] & (1 << ((x * salt2) >> 27))) != 0) &&
((b[3] & (1 << ((x * salt3) >> 27))) != 0) &&
((b[4] & (1 << ((x * salt4) >> 27))) != 0) &&
((b[5] & (1 << ((x * salt5) >> 27))) != 0) &&
((b[6] & (1 << ((x * salt6) >> 27))) != 0) &&
((b[7] & (1 << ((x * salt7) >> 27))) != 0)
}
func (f SplitBlockFilter) insertBulk(x []uint64) {
for i := range x {
f.Insert(x[i])
}
}
================================================
FILE: bloom/block_test.go
================================================
package bloom_test
import (
"math"
"testing"
"github.com/segmentio/parquet-go/bloom"
)
func TestBlock(t *testing.T) {
for i := uint64(0); i < math.MaxUint32; i = (i * 2) + 1 {
x := uint32(i)
b := bloom.Block{}
b.Insert(x)
if !b.Check(x) {
t.Fatalf("bloom filter block does not contain the value that was inserted: %d", x)
}
if b.Check(x - 1) {
t.Fatalf("bloom filter block contains value that was not inserted: %d", ^x)
}
if b.Check(x + 1) {
t.Fatalf("bloom filter block contains value that was not inserted: %d", ^x)
}
if b.Check(^x) {
t.Fatalf("bloom filter block contains value that was not inserted: %d", ^x)
}
}
}
func BenchmarkBlockInsert(b *testing.B) {
x := bloom.Block{}
for i := 0; i < b.N; i++ {
x.Insert(uint32(i))
}
b.SetBytes(bloom.BlockSize)
}
func BenchmarkBlockCheck(b *testing.B) {
x := bloom.Block{}
x.Insert(42)
for i := 0; i < b.N; i++ {
x.Check(42)
}
b.SetBytes(bloom.BlockSize)
}
================================================
FILE: bloom/bloom.go
================================================
// Package bloom implements parquet bloom filters.
package bloom
func fasthash1x64(value uint64, scale int32) uint64 {
return ((value >> 32) * uint64(scale)) >> 32
}
func fasthash4x64(dst, src *[4]uint64, scale int32) {
dst[0] = ((src[0] >> 32) * uint64(scale)) >> 32
dst[1] = ((src[1] >> 32) * uint64(scale)) >> 32
dst[2] = ((src[2] >> 32) * uint64(scale)) >> 32
dst[3] = ((src[3] >> 32) * uint64(scale)) >> 32
}
================================================
FILE: bloom/bloom_test.go
================================================
package bloom
import (
"math/rand"
"testing"
)
// Test file for internal functions of the bloom package.
var global4x64 [4]uint64
func TestFasthash(t *testing.T) {
r := rand.NewSource(0).(rand.Source64)
src := [4]uint64{r.Uint64(), r.Uint64(), r.Uint64(), r.Uint64()}
dst := [4]uint64{0, 0, 0, 0}
exp := [4]uint64{483, 125, 335, 539}
mod := int32(1024)
fasthash4x64(&dst, &src, mod)
if dst != exp {
t.Errorf("got=%v want=%v", dst, exp)
}
}
func BenchmarkFasthash(b *testing.B) {
src := [4]uint64{}
dst := [4]uint64{}
mod := int32(1024)
for i := 0; i < b.N; i++ {
fasthash4x64(&dst, &src, mod)
}
b.SetBytes(32)
global4x64 = dst // use it so the loop isn't optimized away
}
================================================
FILE: bloom/filter.go
================================================
package bloom
import (
"io"
"sync"
"unsafe"
)
// Filter is an interface representing read-only bloom filters where programs
// can probe for the possible presence of a hash key.
type Filter interface {
Check(uint64) bool
}
// SplitBlockFilter is an in-memory implementation of the parquet bloom filters.
//
// This type is useful to construct bloom filters that are later serialized
// to a storage medium.
type SplitBlockFilter []Block
// MakeSplitBlockFilter constructs a SplitBlockFilter value from the data byte
// slice.
func MakeSplitBlockFilter(data []byte) SplitBlockFilter {
p := *(*unsafe.Pointer)(unsafe.Pointer(&data))
n := len(data) / BlockSize
return unsafe.Slice((*Block)(p), n)
}
// NumSplitBlocksOf returns the number of blocks in a filter intended to hold
// the given number of values and bits of filter per value.
//
// This function is useful to determine the number of blocks when creating bloom
// filters in memory, for example:
//
// f := make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(n, 10))
func NumSplitBlocksOf(numValues int64, bitsPerValue uint) int {
numBytes := ((uint(numValues) * bitsPerValue) + 7) / 8
numBlocks := (numBytes + (BlockSize - 1)) / BlockSize
return int(numBlocks)
}
// Reset clears the content of the filter f.
func (f SplitBlockFilter) Reset() {
for i := range f {
f[i] = Block{}
}
}
// Block returns a pointer to the block that the given value hashes to in the
// bloom filter.
func (f SplitBlockFilter) Block(x uint64) *Block { return &f[fasthash1x64(x, int32(len(f)))] }
// InsertBulk adds all values from x into f.
func (f SplitBlockFilter) InsertBulk(x []uint64) { filterInsertBulk(f, x) }
// Insert adds x to f.
func (f SplitBlockFilter) Insert(x uint64) { filterInsert(f, x) }
// Check tests whether x is in f.
func (f SplitBlockFilter) Check(x uint64) bool { return filterCheck(f, x) }
// Bytes converts f to a byte slice.
//
// The returned slice shares the memory of f. The method is intended to be used
// to serialize the bloom filter to a storage medium.
func (f SplitBlockFilter) Bytes() []byte {
return unsafe.Slice(*(**byte)(unsafe.Pointer(&f)), len(f)*BlockSize)
}
// CheckSplitBlock is similar to bloom.SplitBlockFilter.Check but reads the
// bloom filter of n bytes from r.
//
// The size n of the bloom filter is assumed to be a multiple of the block size.
func CheckSplitBlock(r io.ReaderAt, n int64, x uint64) (bool, error) {
block := acquireBlock()
defer releaseBlock(block)
offset := BlockSize * fasthash1x64(x, int32(n/BlockSize))
_, err := r.ReadAt(block.Bytes(), int64(offset))
return block.Check(uint32(x)), err
}
var (
blockPool sync.Pool
)
func acquireBlock() *Block {
b, _ := blockPool.Get().(*Block)
if b == nil {
b = new(Block)
}
return b
}
func releaseBlock(b *Block) {
if b != nil {
blockPool.Put(b)
}
}
================================================
FILE: bloom/filter_amd64.go
================================================
//go:build !purego
package bloom
// This file contains the signatures for bloom filter algorithms implemented in
// filter_amd64.s.
//
// The assembly code provides significant speedups on filter inserts and checks,
// with the greatest gains seen on the bulk insert operation where the use of
// vectorized code yields great results.
//
// The following sections record the kind of performance improvements we were
// able to measure, comparing with performing the filter block lookups in Go
// and calling to the block insert and check routines:
//
// name old time/op new time/op delta
// FilterInsertBulk 45.1ns ± 2% 17.8ns ± 3% -60.41% (p=0.000 n=10+10)
// FilterInsert 3.48ns ± 2% 2.55ns ± 1% -26.86% (p=0.000 n=10+8)
// FilterCheck 3.64ns ± 3% 2.66ns ± 2% -26.82% (p=0.000 n=10+9)
//
// name old speed new speed delta
// FilterInsertBulk 11.4GB/s ± 2% 28.7GB/s ± 3% +152.61% (p=0.000 n=10+10)
// FilterInsert 9.19GB/s ± 2% 12.56GB/s ± 1% +36.71% (p=0.000 n=10+8)
// FilterCheck 8.80GB/s ± 3% 12.03GB/s ± 2% +36.61% (p=0.000 n=10+9)
//go:noescape
func filterInsertBulk(f []Block, x []uint64)
//go:noescape
func filterInsert(f []Block, x uint64)
//go:noescape
func filterCheck(f []Block, x uint64) bool
================================================
FILE: bloom/filter_amd64.s
================================================
//go:build !purego
#include "textflag.h"
#define salt0 0x47b6137b
#define salt1 0x44974d91
#define salt2 0x8824ad5b
#define salt3 0xa2b7289d
#define salt4 0x705495c7
#define salt5 0x2df1424b
#define salt6 0x9efc4947
#define salt7 0x5c6bfb31
// See block_amd64.s for a description of this algorithm.
#define generateMask(src, dst) \
VMOVDQA ones(SB), dst \
VPMULLD salt(SB), src, src \
VPSRLD $27, src, src \
VPSLLVD src, dst, dst
#define applyMask(src, dst) \
VPOR dst, src, src \
VMOVDQU src, dst
#define fasthash1x64(scale, value) \
SHRQ $32, value \
IMULQ scale, value \
SHRQ $32, value \
SHLQ $5, value
#define fasthash4x64(scale, value) \
VPSRLQ $32, value, value \
VPMULUDQ scale, value, value \
VPSRLQ $32, value, value \
VPSLLQ $5, value, value
#define extract4x64(srcYMM, srcXMM, tmpXMM, r0, r1, r2, r3) \
VEXTRACTI128 $1, srcYMM, tmpXMM \
MOVQ srcXMM, r0 \
VPEXTRQ $1, srcXMM, r1 \
MOVQ tmpXMM, r2 \
VPEXTRQ $1, tmpXMM, r3
#define insert(salt, src, dst) \
MOVL src, CX \
IMULL salt, CX \
SHRL $27, CX \
MOVL $1, DX \
SHLL CX, DX \
ORL DX, dst
#define check(salt, b, x) \
MOVL b, CX \
MOVL x, DX \
IMULL salt, DX \
SHRL $27, DX \
BTL DX, CX \
JAE notfound
// func filterInsertBulk(f []Block, x []uint64)
TEXT ·filterInsertBulk(SB), NOSPLIT, $0-48
MOVQ f_base+0(FP), AX
MOVQ f_len+8(FP), CX
MOVQ x_base+24(FP), BX
MOVQ x_len+32(FP), DX
CMPB ·hasAVX2(SB), $0
JE fallback
avx2:
VPBROADCASTQ f_base+8(FP), Y0
// Loop initialization, SI holds the current index in `x`, DI is the number
// of elements in `x` rounded down to the nearest multiple of 4.
XORQ SI, SI
MOVQ DX, DI
SHRQ $2, DI
SHLQ $2, DI
avx2loop4x64:
CMPQ SI, DI
JAE avx2loop1x64
// The masks and indexes for 4 input hashes are computed in each loop
// iteration. The hashes are loaded in Y1 so we can use vector instructions
// to compute all 4 indexes in parallel. The lower 32 bits of the hashes are
// also broadcasted in 4 YMM registers to compute the 4 masks that will then
// be applied to the filter.
VMOVDQU (BX)(SI*8), Y1
VPBROADCASTD 0(BX)(SI*8), Y2
VPBROADCASTD 8(BX)(SI*8), Y3
VPBROADCASTD 16(BX)(SI*8), Y4
VPBROADCASTD 24(BX)(SI*8), Y5
fasthash4x64(Y0, Y1)
generateMask(Y2, Y6)
generateMask(Y3, Y7)
generateMask(Y4, Y8)
generateMask(Y5, Y9)
// The next block of instructions move indexes from the vector to general
// purpose registers in order to use them as offsets when applying the mask
// to the filter.
extract4x64(Y1, X1, X10, R8, R9, R10, R11)
// Apply masks to the filter; this operation is sensitive to aliasing, when
// blocks overlap the, CPU has to serialize the reads and writes, which has
// a measurable impact on throughput. This would be frequent for small bloom
// filters which may have only a few blocks, the probability of seeing
// overlapping blocks on large filters should be small enough to make this
// a non-issue though.
applyMask(Y6, (AX)(R8*1))
applyMask(Y7, (AX)(R9*1))
applyMask(Y8, (AX)(R10*1))
applyMask(Y9, (AX)(R11*1))
ADDQ $4, SI
JMP avx2loop4x64
avx2loop1x64:
// Compute trailing elements in `x` if the length was not a multiple of 4.
// This is the same algorithm as the one in the loop4x64 section, working
// on a single mask/block pair at a time.
CMPQ SI, DX
JE avx2done
MOVQ (BX)(SI*8), R8
VPBROADCASTD (BX)(SI*8), Y0
fasthash1x64(CX, R8)
generateMask(Y0, Y1)
applyMask(Y1, (AX)(R8*1))
INCQ SI
JMP avx2loop1x64
avx2done:
VZEROUPPER
JMP done
fallback:
XORQ SI, SI
MOVQ DX, DI
MOVQ CX, R10
loop:
CMPQ SI, DI
JE done
MOVLQZX (BX)(SI*8), R8
MOVQ (BX)(SI*8), R9
fasthash1x64(R10, R9)
insert($salt0, R8, 0(AX)(R9*1))
insert($salt1, R8, 4(AX)(R9*1))
insert($salt2, R8, 8(AX)(R9*1))
insert($salt3, R8, 12(AX)(R9*1))
insert($salt4, R8, 16(AX)(R9*1))
insert($salt5, R8, 20(AX)(R9*1))
insert($salt6, R8, 24(AX)(R9*1))
insert($salt7, R8, 28(AX)(R9*1))
INCQ SI
JMP loop
done:
RET
// func filterInsert(f []Block, x uint64)
TEXT ·filterInsert(SB), NOSPLIT, $0-32
MOVQ f_base+0(FP), AX
MOVQ f_len+8(FP), BX
MOVQ x+24(FP), CX
fasthash1x64(BX, CX)
CMPB ·hasAVX2(SB), $0
JE fallback
avx2:
VPBROADCASTD x+24(FP), Y1
generateMask(Y1, Y0)
applyMask(Y0, (AX)(CX*1))
VZEROUPPER
RET
fallback:
ADDQ CX, AX
MOVL x+24(FP), BX
insert($salt0, BX, 0(AX))
insert($salt1, BX, 4(AX))
insert($salt2, BX, 8(AX))
insert($salt3, BX, 12(AX))
insert($salt4, BX, 16(AX))
insert($salt5, BX, 20(AX))
insert($salt6, BX, 24(AX))
insert($salt7, BX, 28(AX))
RET
// func filterCheck(f []Block, x uint64) bool
TEXT ·filterCheck(SB), NOSPLIT, $0-33
MOVQ f_base+0(FP), AX
MOVQ f_len+8(FP), BX
MOVQ x+24(FP), CX
fasthash1x64(BX, CX)
CMPB ·hasAVX2(SB), $0
JE fallback
avx2:
VPBROADCASTD x+24(FP), Y1
generateMask(Y1, Y0)
VPAND (AX)(CX*1), Y0, Y1
VPTEST Y0, Y1
SETCS ret+32(FP)
VZEROUPPER
RET
fallback:
ADDQ CX, AX
MOVL x+24(FP), BX
check($salt0, 0(AX), BX)
check($salt1, 4(AX), BX)
check($salt2, 8(AX), BX)
check($salt3, 12(AX), BX)
check($salt4, 16(AX), BX)
check($salt5, 20(AX), BX)
check($salt6, 24(AX), BX)
check($salt7, 28(AX), BX)
MOVB $1, CX
JMP done
notfound:
XORB CX, CX
done:
MOVB CX, ret+32(FP)
RET
================================================
FILE: bloom/filter_default.go
================================================
//go:build purego || !amd64
package bloom
func filterInsertBulk(f []Block, x []uint64) {
for i := range x {
filterInsert(f, x[i])
}
}
func filterInsert(f []Block, x uint64) {
f[fasthash1x64(x, int32(len(f)))].Insert(uint32(x))
}
func filterCheck(f []Block, x uint64) bool {
return f[fasthash1x64(x, int32(len(f)))].Check(uint32(x))
}
================================================
FILE: bloom/filter_test.go
================================================
package bloom_test
import (
"bytes"
"math/rand"
"testing"
"github.com/segmentio/parquet-go/bloom"
)
func TestSplitBlockFilter(t *testing.T) {
const N = 1000
const S = 3
f := make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(N, 10))
p := rand.New(rand.NewSource(S))
// Half of the values are inserted individually.
for i := 0; i < N/2; i++ {
f.Insert(p.Uint64())
}
// The other half is inserted as a bulk operation.
b := make([]uint64, N/2)
for i := range b {
b[i] = p.Uint64()
}
f.InsertBulk(b)
if f.Block(0) == nil {
t.Fatal("looking up filter block returned impossible nil value")
}
for _, test := range []struct {
scenario string
filter bloom.Filter
}{
{scenario: "filter", filter: f},
{scenario: "reader", filter: newSerializedFilter(f.Bytes())},
} {
t.Run(test.scenario, func(t *testing.T) {
p.Seed(S)
falsePositives := 0
for i := 0; i < N; i++ {
x := p.Uint64()
if !test.filter.Check(x) {
t.Fatalf("bloom filter block does not contain the value #%d that was inserted: %d", i, x)
}
if test.filter.Check(^x) {
falsePositives++
}
}
if r := (float64(falsePositives) / N); r > 0.01 {
t.Fatalf("bloom filter triggered too many false positives: %g%%", r*100)
}
})
}
t.Run("Reset", func(t *testing.T) {
allZeros := true
for _, b := range f.Bytes() {
if b != 0 {
allZeros = false
break
}
}
if allZeros {
t.Fatal("bloom filter bytes were all zero after inserting keys")
}
f.Reset()
for i, b := range f.Bytes() {
if b != 0 {
t.Fatalf("bloom filter byte at index %d was not zero after resetting the filter: %02X", i, b)
}
}
})
}
func TestSplitBlockFilterBug1(t *testing.T) {
// This test exercises the case where we bulk insert a single key in the
// filter, which skips the core of the optimized assembly routines and runs
// through the loop handling tails of remaining keys after consuming groups
// of two or more.
//
// The use of quick.Check in bloom filter tests of the parquet package had
// uncovered a bug which was reproduced here in isolation when debugging.
h := [1]uint64{0b1000101001000001001001111000000100011011001000011110011100110000}
f := make(bloom.SplitBlockFilter, 1)
f.InsertBulk(h[:])
if !f.Check(h[0]) {
t.Error("value inserted in the filter was not found")
}
}
type serializedFilter struct {
bytes.Reader
}
func (f *serializedFilter) Check(x uint64) bool {
ok, _ := bloom.CheckSplitBlock(&f.Reader, f.Size(), x)
return ok
}
func newSerializedFilter(b []byte) *serializedFilter {
f := new(serializedFilter)
f.Reset(b)
return f
}
func BenchmarkFilterInsertBulk(b *testing.B) {
f := make(bloom.SplitBlockFilter, 99)
x := make([]uint64, 16)
r := rand.NewSource(0).(rand.Source64)
for i := range x {
x[i] = r.Uint64()
}
for i := 0; i < b.N; i++ {
f.InsertBulk(x)
}
b.SetBytes(bloom.BlockSize * int64(len(x)))
}
func BenchmarkFilterInsert(b *testing.B) {
f := make(bloom.SplitBlockFilter, 1)
for i := 0; i < b.N; i++ {
f.Insert(uint64(i))
}
b.SetBytes(bloom.BlockSize)
}
func BenchmarkFilterCheck(b *testing.B) {
f := make(bloom.SplitBlockFilter, 1)
f.Insert(42)
for i := 0; i < b.N; i++ {
f.Check(42)
}
b.SetBytes(bloom.BlockSize)
}
================================================
FILE: bloom/hash.go
================================================
package bloom
import "github.com/segmentio/parquet-go/bloom/xxhash"
// Hash is an interface abstracting the hashing algorithm used in bloom filters.
//
// Hash instances must be safe to use concurrently from multiple goroutines.
type Hash interface {
// Returns the 64 bit hash of the value passed as argument.
Sum64(value []byte) uint64
// Compute hashes of individual values of primitive types.
Sum64Uint8(value uint8) uint64
Sum64Uint16(value uint16) uint64
Sum64Uint32(value uint32) uint64
Sum64Uint64(value uint64) uint64
Sum64Uint128(value [16]byte) uint64
// Compute hashes of the array of fixed size values passed as arguments,
// returning the number of hashes written to the destination buffer.
MultiSum64Uint8(dst []uint64, src []uint8) int
MultiSum64Uint16(dst []uint64, src []uint16) int
MultiSum64Uint32(dst []uint64, src []uint32) int
MultiSum64Uint64(dst []uint64, src []uint64) int
MultiSum64Uint128(dst []uint64, src [][16]byte) int
}
// XXH64 is an implementation of the Hash interface using the XXH64 algorithm.
type XXH64 struct{}
func (XXH64) Sum64(b []byte) uint64 {
return xxhash.Sum64(b)
}
func (XXH64) Sum64Uint8(v uint8) uint64 {
return xxhash.Sum64Uint8(v)
}
func (XXH64) Sum64Uint16(v uint16) uint64 {
return xxhash.Sum64Uint16(v)
}
func (XXH64) Sum64Uint32(v uint32) uint64 {
return xxhash.Sum64Uint32(v)
}
func (XXH64) Sum64Uint64(v uint64) uint64 {
return xxhash.Sum64Uint64(v)
}
func (XXH64) Sum64Uint128(v [16]byte) uint64 {
return xxhash.Sum64Uint128(v)
}
func (XXH64) MultiSum64Uint8(h []uint64, v []uint8) int {
return xxhash.MultiSum64Uint8(h, v)
}
func (XXH64) MultiSum64Uint16(h []uint64, v []uint16) int {
return xxhash.MultiSum64Uint16(h, v)
}
func (XXH64) MultiSum64Uint32(h []uint64, v []uint32) int {
return xxhash.MultiSum64Uint32(h, v)
}
func (XXH64) MultiSum64Uint64(h []uint64, v []uint64) int {
return xxhash.MultiSum64Uint64(h, v)
}
func (XXH64) MultiSum64Uint128(h []uint64, v [][16]byte) int {
return xxhash.MultiSum64Uint128(h, v)
}
var (
_ Hash = XXH64{}
)
================================================
FILE: bloom/xxhash/LICENSE
================================================
The following files in this directory were derived from the open-source
project at https://github.com/cespare/xxhash. A copy of the original
license is provided below.
------------------------------------------------------------------------
Copyright (c) 2016 Caleb Spare
MIT License
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: bloom/xxhash/sum64uint.go
================================================
package xxhash
func Sum64Uint8(v uint8) uint64 {
h := prime5 + 1
h ^= uint64(v) * prime5
return avalanche(rol11(h) * prime1)
}
func Sum64Uint16(v uint16) uint64 {
h := prime5 + 2
h ^= uint64(v&0xFF) * prime5
h = rol11(h) * prime1
h ^= uint64(v>>8) * prime5
h = rol11(h) * prime1
return avalanche(h)
}
func Sum64Uint32(v uint32) uint64 {
h := prime5 + 4
h ^= uint64(v) * prime1
return avalanche(rol23(h)*prime2 + prime3)
}
func Sum64Uint64(v uint64) uint64 {
h := prime5 + 8
h ^= round(0, v)
return avalanche(rol27(h)*prime1 + prime4)
}
func Sum64Uint128(v [16]byte) uint64 {
h := prime5 + 16
h ^= round(0, u64(v[:8]))
h = rol27(h)*prime1 + prime4
h ^= round(0, u64(v[8:]))
h = rol27(h)*prime1 + prime4
return avalanche(h)
}
================================================
FILE: bloom/xxhash/sum64uint_amd64.go
================================================
//go:build !purego
package xxhash
import "golang.org/x/sys/cpu"
// This file contains the declaration of signatures for the multi hashing
// functions implemented in sum64uint_amd64.s, which provides vectorized
// versions of the algorithms written in sum64uint_purego.go.
//
// The use of SIMD optimization yields measurable throughput increases when
// computing multiple hash values in parallel compared to hashing values
// individually in loops:
//
// name old speed new speed delta
// MultiSum64Uint8/4KB 4.94GB/s ± 2% 6.82GB/s ± 5% +38.00% (p=0.000 n=10+10)
// MultiSum64Uint16/4KB 3.44GB/s ± 2% 4.63GB/s ± 4% +34.56% (p=0.000 n=10+10)
// MultiSum64Uint32/4KB 4.84GB/s ± 2% 6.39GB/s ± 4% +31.94% (p=0.000 n=10+10)
// MultiSum64Uint64/4KB 3.77GB/s ± 2% 4.95GB/s ± 2% +31.14% (p=0.000 n=9+10)
// MultiSum64Uint128/4KB 1.84GB/s ± 2% 3.11GB/s ± 4% +68.70% (p=0.000 n=9+10)
//
// name old hash/s new hash/s delta
// MultiSum64Uint8/4KB 617M ± 2% 852M ± 5% +38.00% (p=0.000 n=10+10)
// MultiSum64Uint16/4KB 431M ± 2% 579M ± 4% +34.56% (p=0.000 n=10+10)
// MultiSum64Uint32/4KB 605M ± 2% 799M ± 4% +31.94% (p=0.000 n=10+10)
// MultiSum64Uint64/4KB 471M ± 2% 618M ± 2% +31.14% (p=0.000 n=9+10)
// MultiSum64Uint128/4KB 231M ± 2% 389M ± 4% +68.70% (p=0.000 n=9+10)
//
// The benchmarks measure the throughput of hashes produced, as a rate of values
// and bytes.
var hasAVX512 = cpu.X86.HasAVX512 &&
cpu.X86.HasAVX512F &&
cpu.X86.HasAVX512CD
//go:noescape
func MultiSum64Uint8(h []uint64, v []uint8) int
//go:noescape
func MultiSum64Uint16(h []uint64, v []uint16) int
//go:noescape
func MultiSum64Uint32(h []uint64, v []uint32) int
//go:noescape
func MultiSum64Uint64(h []uint64, v []uint64) int
//go:noescape
func MultiSum64Uint128(h []uint64, v [][16]byte) int
================================================
FILE: bloom/xxhash/sum64uint_amd64.s
================================================
//go:build !purego
#include "textflag.h"
/*
The algorithms in this file are assembly versions of the Go functions in the
sum64uint_default.go file.
The implementations are mostly direct translations of the Go code to assembly,
leveraging SIMD instructions to process chunks of the input variables in
parallel at each loop iteration. To maximize utilization of the CPU capacity,
some of the functions unroll two steps of the vectorized loop per iteration,
which yields further throughput because the CPU is able to process some of the
instruction from the two steps in parallel due to having no data dependencies
between the inputs and outputs.
The use of AVX-512 yields a significant increase in throughput on all the
algorithms, in most part thanks to the VPMULLQ instructions which compute
8 x 64 bits multiplication. There were no equivalent instruction in AVX2, which
required emulating vector multiplication with a combination of 32 bits multiply,
additions, shifts, and masks: the amount of instructions and data dependencies
resulted in the AVX2 code yielding equivalent performance characteristics for a
much higher complexity.
The benchmark results below showcase the improvements that the AVX-512 code
yields on the XXH64 algorithms:
name old speed new speed delta
MultiSum64Uint8/4KB 4.97GB/s ± 0% 14.59GB/s ± 1% +193.73% (p=0.000 n=10+10)
MultiSum64Uint16/4KB 3.55GB/s ± 0% 9.46GB/s ± 0% +166.20% (p=0.000 n=10+9)
MultiSum64Uint32/4KB 4.48GB/s ± 0% 13.93GB/s ± 1% +210.93% (p=0.000 n=10+10)
MultiSum64Uint64/4KB 3.57GB/s ± 0% 11.12GB/s ± 1% +211.73% (p=0.000 n=9+10)
MultiSum64Uint128/4KB 2.54GB/s ± 0% 6.49GB/s ± 1% +155.69% (p=0.000 n=10+10)
name old hash/s new hash/s delta
MultiSum64Uint8/4KB 621M ± 0% 1823M ± 1% +193.73% (p=0.000 n=10+10)
MultiSum64Uint16/4KB 444M ± 0% 1182M ± 0% +166.20% (p=0.000 n=10+9)
MultiSum64Uint32/4KB 560M ± 0% 1742M ± 1% +210.93% (p=0.000 n=10+10)
MultiSum64Uint64/4KB 446M ± 0% 1391M ± 1% +211.73% (p=0.000 n=9+10)
MultiSum64Uint128/4KB 317M ± 0% 811M ± 1% +155.69% (p=0.000 n=10+10)
The functions perform runtime detection of AVX-512 support by testing the value
of the xxhash.hasAVX512 variable declared and initialized in sum64uint_amd64.go.
Branch mispredictions on those tests are very unlikely since the value is never
modified by the application. The cost of the comparisons are also amortized by
the bulk APIs of the MultiSum64* functions (a single test is required per call).
If a bug is suspected in the vectorized code, compiling the program or running
the tests with -tags=purego can help verify whether the behavior changes when
the program does not use the assembly versions.
Maintenance of these functions can be complex; however, the XXH64 algorithm is
unlikely to evolve, and the implementations unlikely to change. The tests in
sum64uint_test.go compare the outputs of MultiSum64* functions with the
reference xxhash.Sum64 function, future maintainers can rely on those tests
passing as a guarantee that they have not introduced regressions.
*/
#define PRIME1 0x9E3779B185EBCA87
#define PRIME2 0xC2B2AE3D27D4EB4F
#define PRIME3 0x165667B19E3779F9
#define PRIME4 0x85EBCA77C2B2AE63
#define PRIME5 0x27D4EB2F165667C5
#define prime1 R12
#define prime2 R13
#define prime3 R14
#define prime4 R15
#define prime5 R15 // same as prime4 because they are not used together
#define prime1ZMM Z12
#define prime2ZMM Z13
#define prime3ZMM Z14
#define prime4ZMM Z15
#define prime5ZMM Z15
DATA prime1vec<>+0(SB)/8, $PRIME1
DATA prime1vec<>+8(SB)/8, $PRIME1
DATA prime1vec<>+16(SB)/8, $PRIME1
DATA prime1vec<>+24(SB)/8, $PRIME1
DATA prime1vec<>+32(SB)/8, $PRIME1
DATA prime1vec<>+40(SB)/8, $PRIME1
DATA prime1vec<>+48(SB)/8, $PRIME1
DATA prime1vec<>+56(SB)/8, $PRIME1
GLOBL prime1vec<>(SB), RODATA|NOPTR, $64
DATA prime2vec<>+0(SB)/8, $PRIME2
DATA prime2vec<>+8(SB)/8, $PRIME2
DATA prime2vec<>+16(SB)/8, $PRIME2
DATA prime2vec<>+24(SB)/8, $PRIME2
DATA prime2vec<>+32(SB)/8, $PRIME2
DATA prime2vec<>+40(SB)/8, $PRIME2
DATA prime2vec<>+48(SB)/8, $PRIME2
DATA prime2vec<>+56(SB)/8, $PRIME2
GLOBL prime2vec<>(SB), RODATA|NOPTR, $64
DATA prime3vec<>+0(SB)/8, $PRIME3
DATA prime3vec<>+8(SB)/8, $PRIME3
DATA prime3vec<>+16(SB)/8, $PRIME3
DATA prime3vec<>+24(SB)/8, $PRIME3
DATA prime3vec<>+32(SB)/8, $PRIME3
DATA prime3vec<>+40(SB)/8, $PRIME3
DATA prime3vec<>+48(SB)/8, $PRIME3
DATA prime3vec<>+56(SB)/8, $PRIME3
GLOBL prime3vec<>(SB), RODATA|NOPTR, $64
DATA prime4vec<>+0(SB)/8, $PRIME4
DATA prime4vec<>+8(SB)/8, $PRIME4
DATA prime4vec<>+16(SB)/8, $PRIME4
DATA prime4vec<>+24(SB)/8, $PRIME4
DATA prime4vec<>+32(SB)/8, $PRIME4
DATA prime4vec<>+40(SB)/8, $PRIME4
DATA prime4vec<>+48(SB)/8, $PRIME4
DATA prime4vec<>+56(SB)/8, $PRIME4
GLOBL prime4vec<>(SB), RODATA|NOPTR, $64
DATA prime5vec<>+0(SB)/8, $PRIME5
DATA prime5vec<>+8(SB)/8, $PRIME5
DATA prime5vec<>+16(SB)/8, $PRIME5
DATA prime5vec<>+24(SB)/8, $PRIME5
DATA prime5vec<>+32(SB)/8, $PRIME5
DATA prime5vec<>+40(SB)/8, $PRIME5
DATA prime5vec<>+48(SB)/8, $PRIME5
DATA prime5vec<>+56(SB)/8, $PRIME5
GLOBL prime5vec<>(SB), RODATA|NOPTR, $64
DATA prime5vec1<>+0(SB)/8, $PRIME5+1
DATA prime5vec1<>+8(SB)/8, $PRIME5+1
DATA prime5vec1<>+16(SB)/8, $PRIME5+1
DATA prime5vec1<>+24(SB)/8, $PRIME5+1
DATA prime5vec1<>+32(SB)/8, $PRIME5+1
DATA prime5vec1<>+40(SB)/8, $PRIME5+1
DATA prime5vec1<>+48(SB)/8, $PRIME5+1
DATA prime5vec1<>+56(SB)/8, $PRIME5+1
GLOBL prime5vec1<>(SB), RODATA|NOPTR, $64
DATA prime5vec2<>+0(SB)/8, $PRIME5+2
DATA prime5vec2<>+8(SB)/8, $PRIME5+2
DATA prime5vec2<>+16(SB)/8, $PRIME5+2
DATA prime5vec2<>+24(SB)/8, $PRIME5+2
DATA prime5vec2<>+32(SB)/8, $PRIME5+2
DATA prime5vec2<>+40(SB)/8, $PRIME5+2
DATA prime5vec2<>+48(SB)/8, $PRIME5+2
DATA prime5vec2<>+56(SB)/8, $PRIME5+2
GLOBL prime5vec2<>(SB), RODATA|NOPTR, $64
DATA prime5vec4<>+0(SB)/8, $PRIME5+4
DATA prime5vec4<>+8(SB)/8, $PRIME5+4
DATA prime5vec4<>+16(SB)/8, $PRIME5+4
DATA prime5vec4<>+24(SB)/8, $PRIME5+4
DATA prime5vec4<>+32(SB)/8, $PRIME5+4
DATA prime5vec4<>+40(SB)/8, $PRIME5+4
DATA prime5vec4<>+48(SB)/8, $PRIME5+4
DATA prime5vec4<>+56(SB)/8, $PRIME5+4
GLOBL prime5vec4<>(SB), RODATA|NOPTR, $64
DATA prime5vec8<>+0(SB)/8, $PRIME5+8
DATA prime5vec8<>+8(SB)/8, $PRIME5+8
DATA prime5vec8<>+16(SB)/8, $PRIME5+8
DATA prime5vec8<>+24(SB)/8, $PRIME5+8
DATA prime5vec8<>+32(SB)/8, $PRIME5+8
DATA prime5vec8<>+40(SB)/8, $PRIME5+8
DATA prime5vec8<>+48(SB)/8, $PRIME5+8
DATA prime5vec8<>+56(SB)/8, $PRIME5+8
GLOBL prime5vec8<>(SB), RODATA|NOPTR, $64
DATA prime5vec16<>+0(SB)/8, $PRIME5+16
DATA prime5vec16<>+8(SB)/8, $PRIME5+16
DATA prime5vec16<>+16(SB)/8, $PRIME5+16
DATA prime5vec16<>+24(SB)/8, $PRIME5+16
DATA prime5vec16<>+32(SB)/8, $PRIME5+16
DATA prime5vec16<>+40(SB)/8, $PRIME5+16
DATA prime5vec16<>+48(SB)/8, $PRIME5+16
DATA prime5vec16<>+56(SB)/8, $PRIME5+16
GLOBL prime5vec16<>(SB), RODATA|NOPTR, $64
DATA lowbytemask<>+0(SB)/8, $0xFF
DATA lowbytemask<>+8(SB)/8, $0xFF
DATA lowbytemask<>+16(SB)/8, $0xFF
DATA lowbytemask<>+24(SB)/8, $0xFF
DATA lowbytemask<>+32(SB)/8, $0xFF
DATA lowbytemask<>+40(SB)/8, $0xFF
DATA lowbytemask<>+48(SB)/8, $0xFF
DATA lowbytemask<>+56(SB)/8, $0xFF
GLOBL lowbytemask<>(SB), RODATA|NOPTR, $64
DATA vpermi2qeven<>+0(SB)/8, $0
DATA vpermi2qeven<>+8(SB)/8, $2
DATA vpermi2qeven<>+16(SB)/8, $4
DATA vpermi2qeven<>+24(SB)/8, $6
DATA vpermi2qeven<>+32(SB)/8, $(1<<3)|0
DATA vpermi2qeven<>+40(SB)/8, $(1<<3)|2
DATA vpermi2qeven<>+48(SB)/8, $(1<<3)|4
DATA vpermi2qeven<>+56(SB)/8, $(1<<3)|6
GLOBL vpermi2qeven<>(SB), RODATA|NOPTR, $64
DATA vpermi2qodd<>+0(SB)/8, $1
DATA vpermi2qodd<>+8(SB)/8, $3
DATA vpermi2qodd<>+16(SB)/8, $5
DATA vpermi2qodd<>+24(SB)/8, $7
DATA vpermi2qodd<>+32(SB)/8, $(1<<3)|1
DATA vpermi2qodd<>+40(SB)/8, $(1<<3)|3
DATA vpermi2qodd<>+48(SB)/8, $(1<<3)|5
DATA vpermi2qodd<>+56(SB)/8, $(1<<3)|7
GLOBL vpermi2qodd<>(SB), RODATA|NOPTR, $64
#define round(input, acc) \
IMULQ prime2, input \
ADDQ input, acc \
ROLQ $31, acc \
IMULQ prime1, acc
#define avalanche(tmp, acc) \
MOVQ acc, tmp \
SHRQ $33, tmp \
XORQ tmp, acc \
IMULQ prime2, acc \
MOVQ acc, tmp \
SHRQ $29, tmp \
XORQ tmp, acc \
IMULQ prime3, acc \
MOVQ acc, tmp \
SHRQ $32, tmp \
XORQ tmp, acc
#define round8x64(input, acc) \
VPMULLQ prime2ZMM, input, input \
VPADDQ input, acc, acc \
VPROLQ $31, acc, acc \
VPMULLQ prime1ZMM, acc, acc
#define avalanche8x64(tmp, acc) \
VPSRLQ $33, acc, tmp \
VPXORQ tmp, acc, acc \
VPMULLQ prime2ZMM, acc, acc \
VPSRLQ $29, acc, tmp \
VPXORQ tmp, acc, acc \
VPMULLQ prime3ZMM, acc, acc \
VPSRLQ $32, acc, tmp \
VPXORQ tmp, acc, acc
// func MultiSum64Uint8(h []uint64, v []uint8) int
TEXT ·MultiSum64Uint8(SB), NOSPLIT, $0-54
MOVQ $PRIME1, prime1
MOVQ $PRIME2, prime2
MOVQ $PRIME3, prime3
MOVQ $PRIME5, prime5
MOVQ h_base+0(FP), AX
MOVQ h_len+8(FP), CX
MOVQ v_base+24(FP), BX
MOVQ v_len+32(FP), DX
CMPQ CX, DX
CMOVQGT DX, CX
MOVQ CX, ret+48(FP)
XORQ SI, SI
CMPQ CX, $32
JB loop
CMPB ·hasAVX512(SB), $0
JE loop
MOVQ CX, DI
SHRQ $5, DI
SHLQ $5, DI
VMOVDQU64 prime1vec<>(SB), prime1ZMM
VMOVDQU64 prime2vec<>(SB), prime2ZMM
VMOVDQU64 prime3vec<>(SB), prime3ZMM
VMOVDQU64 prime5vec<>(SB), prime5ZMM
VMOVDQU64 prime5vec1<>(SB), Z6
loop32x64:
VMOVDQA64 Z6, Z0
VMOVDQA64 Z6, Z3
VMOVDQA64 Z6, Z20
VMOVDQA64 Z6, Z23
VPMOVZXBQ (BX)(SI*1), Z1
VPMOVZXBQ 8(BX)(SI*1), Z4
VPMOVZXBQ 16(BX)(SI*1), Z21
VPMOVZXBQ 24(BX)(SI*1), Z24
VPMULLQ prime5ZMM, Z1, Z1
VPMULLQ prime5ZMM, Z4, Z4
VPMULLQ prime5ZMM, Z21, Z21
VPMULLQ prime5ZMM, Z24, Z24
VPXORQ Z1, Z0, Z0
VPXORQ Z4, Z3, Z3
VPXORQ Z21, Z20, Z20
VPXORQ Z24, Z23, Z23
VPROLQ $11, Z0, Z0
VPROLQ $11, Z3, Z3
VPROLQ $11, Z20, Z20
VPROLQ $11, Z23, Z23
VPMULLQ prime1ZMM, Z0, Z0
VPMULLQ prime1ZMM, Z3, Z3
VPMULLQ prime1ZMM, Z20, Z20
VPMULLQ prime1ZMM, Z23, Z23
avalanche8x64(Z1, Z0)
avalanche8x64(Z4, Z3)
avalanche8x64(Z21, Z20)
avalanche8x64(Z24, Z23)
VMOVDQU64 Z0, (AX)(SI*8)
VMOVDQU64 Z3, 64(AX)(SI*8)
VMOVDQU64 Z20, 128(AX)(SI*8)
VMOVDQU64 Z23, 192(AX)(SI*8)
ADDQ $32, SI
CMPQ SI, DI
JB loop32x64
VZEROUPPER
loop:
CMPQ SI, CX
JE done
MOVQ $PRIME5+1, R8
MOVBQZX (BX)(SI*1), R9
IMULQ prime5, R9
XORQ R9, R8
ROLQ $11, R8
IMULQ prime1, R8
avalanche(R9, R8)
MOVQ R8, (AX)(SI*8)
INCQ SI
JMP loop
done:
RET
// func MultiSum64Uint16(h []uint64, v []uint16) int
TEXT ·MultiSum64Uint16(SB), NOSPLIT, $0-54
MOVQ $PRIME1, prime1
MOVQ $PRIME2, prime2
MOVQ $PRIME3, prime3
MOVQ $PRIME5, prime5
MOVQ h_base+0(FP), AX
MOVQ h_len+8(FP), CX
MOVQ v_base+24(FP), BX
MOVQ v_len+32(FP), DX
CMPQ CX, DX
CMOVQGT DX, CX
MOVQ CX, ret+48(FP)
XORQ SI, SI
CMPQ CX, $16
JB loop
CMPB ·hasAVX512(SB), $0
JE loop
MOVQ CX, DI
SHRQ $4, DI
SHLQ $4, DI
VMOVDQU64 prime1vec<>(SB), prime1ZMM
VMOVDQU64 prime2vec<>(SB), prime2ZMM
VMOVDQU64 prime3vec<>(SB), prime3ZMM
VMOVDQU64 prime5vec<>(SB), prime5ZMM
VMOVDQU64 prime5vec2<>(SB), Z6
VMOVDQU64 lowbytemask<>(SB), Z7
loop16x64:
VMOVDQA64 Z6, Z0
VMOVDQA64 Z6, Z3
VPMOVZXWQ (BX)(SI*2), Z1
VPMOVZXWQ 16(BX)(SI*2), Z4
VMOVDQA64 Z1, Z8
VMOVDQA64 Z4, Z9
VPSRLQ $8, Z8, Z8
VPSRLQ $8, Z9, Z9
VPANDQ Z7, Z1, Z1
VPANDQ Z7, Z4, Z4
VPMULLQ prime5ZMM, Z1, Z1
VPMULLQ prime5ZMM, Z4, Z4
VPXORQ Z1, Z0, Z0
VPXORQ Z4, Z3, Z3
VPROLQ $11, Z0, Z0
VPROLQ $11, Z3, Z3
VPMULLQ prime1ZMM, Z0, Z0
VPMULLQ prime1ZMM, Z3, Z3
VPMULLQ prime5ZMM, Z8, Z8
VPMULLQ prime5ZMM, Z9, Z9
VPXORQ Z8, Z0, Z0
VPXORQ Z9, Z3, Z3
VPROLQ $11, Z0, Z0
VPROLQ $11, Z3, Z3
VPMULLQ prime1ZMM, Z0, Z0
VPMULLQ prime1ZMM, Z3, Z3
avalanche8x64(Z1, Z0)
avalanche8x64(Z4, Z3)
VMOVDQU64 Z0, (AX)(SI*8)
VMOVDQU64 Z3, 64(AX)(SI*8)
ADDQ $16, SI
CMPQ SI, DI
JB loop16x64
VZEROUPPER
loop:
CMPQ SI, CX
JE done
MOVQ $PRIME5+2, R8
MOVWQZX (BX)(SI*2), R9
MOVQ R9, R10
SHRQ $8, R10
ANDQ $0xFF, R9
IMULQ prime5, R9
XORQ R9, R8
ROLQ $11, R8
IMULQ prime1, R8
IMULQ prime5, R10
XORQ R10, R8
ROLQ $11, R8
IMULQ prime1, R8
avalanche(R9, R8)
MOVQ R8, (AX)(SI*8)
INCQ SI
JMP loop
done:
RET
// func MultiSum64Uint32(h []uint64, v []uint32) int
TEXT ·MultiSum64Uint32(SB), NOSPLIT, $0-54
MOVQ $PRIME1, prime1
MOVQ $PRIME2, prime2
MOVQ $PRIME3, prime3
MOVQ h_base+0(FP), AX
MOVQ h_len+8(FP), CX
MOVQ v_base+24(FP), BX
MOVQ v_len+32(FP), DX
CMPQ CX, DX
CMOVQGT DX, CX
MOVQ CX, ret+48(FP)
XORQ SI, SI
CMPQ CX, $32
JB loop
CMPB ·hasAVX512(SB), $0
JE loop
MOVQ CX, DI
SHRQ $5, DI
SHLQ $5, DI
VMOVDQU64 prime1vec<>(SB), prime1ZMM
VMOVDQU64 prime2vec<>(SB), prime2ZMM
VMOVDQU64 prime3vec<>(SB), prime3ZMM
VMOVDQU64 prime5vec4<>(SB), Z6
loop32x64:
VMOVDQA64 Z6, Z0
VMOVDQA64 Z6, Z3
VMOVDQA64 Z6, Z20
VMOVDQA64 Z6, Z23
VPMOVZXDQ (BX)(SI*4), Z1
VPMOVZXDQ 32(BX)(SI*4), Z4
VPMOVZXDQ 64(BX)(SI*4), Z21
VPMOVZXDQ 96(BX)(SI*4), Z24
VPMULLQ prime1ZMM, Z1, Z1
VPMULLQ prime1ZMM, Z4, Z4
VPMULLQ prime1ZMM, Z21, Z21
VPMULLQ prime1ZMM, Z24, Z24
VPXORQ Z1, Z0, Z0
VPXORQ Z4, Z3, Z3
VPXORQ Z21, Z20, Z20
VPXORQ Z24, Z23, Z23
VPROLQ $23, Z0, Z0
VPROLQ $23, Z3, Z3
VPROLQ $23, Z20, Z20
VPROLQ $23, Z23, Z23
VPMULLQ prime2ZMM, Z0, Z0
VPMULLQ prime2ZMM, Z3, Z3
VPMULLQ prime2ZMM, Z20, Z20
VPMULLQ prime2ZMM, Z23, Z23
VPADDQ prime3ZMM, Z0, Z0
VPADDQ prime3ZMM, Z3, Z3
VPADDQ prime3ZMM, Z20, Z20
VPADDQ prime3ZMM, Z23, Z23
avalanche8x64(Z1, Z0)
avalanche8x64(Z4, Z3)
avalanche8x64(Z21, Z20)
avalanche8x64(Z24, Z23)
VMOVDQU64 Z0, (AX)(SI*8)
VMOVDQU64 Z3, 64(AX)(SI*8)
VMOVDQU64 Z20, 128(AX)(SI*8)
VMOVDQU64 Z23, 192(AX)(SI*8)
ADDQ $32, SI
CMPQ SI, DI
JB loop32x64
VZEROUPPER
loop:
CMPQ SI, CX
JE done
MOVQ $PRIME5+4, R8
MOVLQZX (BX)(SI*4), R9
IMULQ prime1, R9
XORQ R9, R8
ROLQ $23, R8
IMULQ prime2, R8
ADDQ prime3, R8
avalanche(R9, R8)
MOVQ R8, (AX)(SI*8)
INCQ SI
JMP loop
done:
RET
// func MultiSum64Uint64(h []uint64, v []uint64) int
TEXT ·MultiSum64Uint64(SB), NOSPLIT, $0-54
MOVQ $PRIME1, prime1
MOVQ $PRIME2, prime2
MOVQ $PRIME3, prime3
MOVQ $PRIME4, prime4
MOVQ h_base+0(FP), AX
MOVQ h_len+8(FP), CX
MOVQ v_base+24(FP), BX
MOVQ v_len+32(FP), DX
CMPQ CX, DX
CMOVQGT DX, CX
MOVQ CX, ret+48(FP)
XORQ SI, SI
CMPQ CX, $32
JB loop
CMPB ·hasAVX512(SB), $0
JE loop
MOVQ CX, DI
SHRQ $5, DI
SHLQ $5, DI
VMOVDQU64 prime1vec<>(SB), prime1ZMM
VMOVDQU64 prime2vec<>(SB), prime2ZMM
VMOVDQU64 prime3vec<>(SB), prime3ZMM
VMOVDQU64 prime4vec<>(SB), prime4ZMM
VMOVDQU64 prime5vec8<>(SB), Z6
loop32x64:
VMOVDQA64 Z6, Z0
VMOVDQA64 Z6, Z3
VMOVDQA64 Z6, Z20
VMOVDQA64 Z6, Z23
VMOVDQU64 (BX)(SI*8), Z1
VMOVDQU64 64(BX)(SI*8), Z4
VMOVDQU64 128(BX)(SI*8), Z21
VMOVDQU64 192(BX)(SI*8), Z24
VPXORQ Z2, Z2, Z2
VPXORQ Z5, Z5, Z5
VPXORQ Z22, Z22, Z22
VPXORQ Z25, Z25, Z25
round8x64(Z1, Z2)
round8x64(Z4, Z5)
round8x64(Z21, Z22)
round8x64(Z24, Z25)
VPXORQ Z2, Z0, Z0
VPXORQ Z5, Z3, Z3
VPXORQ Z22, Z20, Z20
VPXORQ Z25, Z23, Z23
VPROLQ $27, Z0, Z0
VPROLQ $27, Z3, Z3
VPROLQ $27, Z20, Z20
VPROLQ $27, Z23, Z23
VPMULLQ prime1ZMM, Z0, Z0
VPMULLQ prime1ZMM, Z3, Z3
VPMULLQ prime1ZMM, Z20, Z20
VPMULLQ prime1ZMM, Z23, Z23
VPADDQ prime4ZMM, Z0, Z0
VPADDQ prime4ZMM, Z3, Z3
VPADDQ prime4ZMM, Z20, Z20
VPADDQ prime4ZMM, Z23, Z23
avalanche8x64(Z1, Z0)
avalanche8x64(Z4, Z3)
avalanche8x64(Z21, Z20)
avalanche8x64(Z24, Z23)
VMOVDQU64 Z0, (AX)(SI*8)
VMOVDQU64 Z3, 64(AX)(SI*8)
VMOVDQU64 Z20, 128(AX)(SI*8)
VMOVDQU64 Z23, 192(AX)(SI*8)
ADDQ $32, SI
CMPQ SI, DI
JB loop32x64
VZEROUPPER
loop:
CMPQ SI, CX
JE done
MOVQ $PRIME5+8, R8
MOVQ (BX)(SI*8), R9
XORQ R10, R10
round(R9, R10)
XORQ R10, R8
ROLQ $27, R8
IMULQ prime1, R8
ADDQ prime4, R8
avalanche(R9, R8)
MOVQ R8, (AX)(SI*8)
INCQ SI
JMP loop
done:
RET
// func MultiSum64Uint128(h []uint64, v [][16]byte) int
TEXT ·MultiSum64Uint128(SB), NOSPLIT, $0-54
MOVQ $PRIME1, prime1
MOVQ $PRIME2, prime2
MOVQ $PRIME3, prime3
MOVQ $PRIME4, prime4
MOVQ h_base+0(FP), AX
MOVQ h_len+8(FP), CX
MOVQ v_base+24(FP), BX
MOVQ v_len+32(FP), DX
CMPQ CX, DX
CMOVQGT DX, CX
MOVQ CX, ret+48(FP)
XORQ SI, SI
CMPQ CX, $16
JB loop
CMPB ·hasAVX512(SB), $0
JE loop
MOVQ CX, DI
SHRQ $4, DI
SHLQ $4, DI
VMOVDQU64 prime1vec<>(SB), prime1ZMM
VMOVDQU64 prime2vec<>(SB), prime2ZMM
VMOVDQU64 prime3vec<>(SB), prime3ZMM
VMOVDQU64 prime4vec<>(SB), prime4ZMM
VMOVDQU64 prime5vec16<>(SB), Z6
VMOVDQU64 vpermi2qeven<>(SB), Z7
VMOVDQU64 vpermi2qodd<>(SB), Z8
loop16x64:
// This algorithm is slightly different from the other ones, because it is
// the only case where the input values are larger than the output (128 bits
// vs 64 bits).
//
// Computing the XXH64 of 128 bits values requires doing two passes over the
// lower and upper 64 bits. The lower and upper quad/ words are split in
// separate vectors, the first pass is applied on the vector holding the
// lower bits of 8 input values, then the second pass is applied with the
// vector holding the upper bits.
//
// Following the model used in the other functions, we unroll the work of
// two consecutive groups of 8 values per loop iteration in order to
// maximize utilization of CPU resources.
CMPQ SI, DI
JE loop
VMOVDQA64 Z6, Z0
VMOVDQA64 Z6, Z20
VMOVDQU64 (BX), Z1
VMOVDQU64 64(BX), Z9
VMOVDQU64 128(BX), Z21
VMOVDQU64 192(BX), Z29
VMOVDQA64 Z7, Z2
VMOVDQA64 Z8, Z3
VMOVDQA64 Z7, Z22
VMOVDQA64 Z8, Z23
VPERMI2Q Z9, Z1, Z2
VPERMI2Q Z9, Z1, Z3
VPERMI2Q Z29, Z21, Z22
VPERMI2Q Z29, Z21, Z23
// Compute the rounds on inputs.
VPXORQ Z4, Z4, Z4
VPXORQ Z5, Z5, Z5
VPXORQ Z24, Z24, Z24
VPXORQ Z25, Z25, Z25
round8x64(Z2, Z4)
round8x64(Z3, Z5)
round8x64(Z22, Z24)
round8x64(Z23, Z25)
// Lower 64 bits.
VPXORQ Z4, Z0, Z0
VPXORQ Z24, Z20, Z20
VPROLQ $27, Z0, Z0
VPROLQ $27, Z20, Z20
VPMULLQ prime1ZMM, Z0, Z0
VPMULLQ prime1ZMM, Z20, Z20
VPADDQ prime4ZMM, Z0, Z0
VPADDQ prime4ZMM, Z20, Z20
// Upper 64 bits.
VPXORQ Z5, Z0, Z0
VPXORQ Z25, Z20, Z20
VPROLQ $27, Z0, Z0
VPROLQ $27, Z20, Z20
VPMULLQ prime1ZMM, Z0, Z0
VPMULLQ prime1ZMM, Z20, Z20
VPADDQ prime4ZMM, Z0, Z0
VPADDQ prime4ZMM, Z20, Z20
avalanche8x64(Z1, Z0)
avalanche8x64(Z21, Z20)
VMOVDQU64 Z0, (AX)(SI*8)
VMOVDQU64 Z20, 64(AX)(SI*8)
ADDQ $256, BX
ADDQ $16, SI
JMP loop16x64
VZEROUPPER
loop:
CMPQ SI, CX
JE done
MOVQ $PRIME5+16, R8
MOVQ (BX), DX
MOVQ 8(BX), DI
XORQ R9, R9
XORQ R10, R10
round(DX, R9)
round(DI, R10)
XORQ R9, R8
ROLQ $27, R8
IMULQ prime1, R8
ADDQ prime4, R8
XORQ R10, R8
ROLQ $27, R8
IMULQ prime1, R8
ADDQ prime4, R8
avalanche(R9, R8)
MOVQ R8, (AX)(SI*8)
ADDQ $16, BX
INCQ SI
JMP loop
done:
RET
================================================
FILE: bloom/xxhash/sum64uint_purego.go
================================================
//go:build purego || !amd64
package xxhash
func MultiSum64Uint8(h []uint64, v []uint8) int {
n := min(len(h), len(v))
h = h[:n]
v = v[:n]
for i := range v {
h[i] = Sum64Uint8(v[i])
}
return n
}
func MultiSum64Uint16(h []uint64, v []uint16) int {
n := min(len(h), len(v))
h = h[:n]
v = v[:n]
for i := range v {
h[i] = Sum64Uint16(v[i])
}
return n
}
func MultiSum64Uint32(h []uint64, v []uint32) int {
n := min(len(h), len(v))
h = h[:n]
v = v[:n]
for i := range v {
h[i] = Sum64Uint32(v[i])
}
return n
}
func MultiSum64Uint64(h []uint64, v []uint64) int {
n := min(len(h), len(v))
h = h[:n]
v = v[:n]
for i := range v {
h[i] = Sum64Uint64(v[i])
}
return n
}
func MultiSum64Uint128(h []uint64, v [][16]byte) int {
n := min(len(h), len(v))
h = h[:n]
v = v[:n]
for i := range v {
h[i] = Sum64Uint128(v[i])
}
return n
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
================================================
FILE: bloom/xxhash/sum64uint_test.go
================================================
package xxhash_test
import (
"encoding/binary"
"fmt"
"testing"
"testing/quick"
"time"
"github.com/segmentio/parquet-go/bloom/xxhash"
)
func TestSumUint8(t *testing.T) {
b := [1]byte{0: 42}
h := xxhash.Sum64Uint8(42)
x := xxhash.Sum64(b[:])
if h != x {
t.Errorf("got %064b; want %064b", h, x)
}
}
func TestSumUint16(t *testing.T) {
b := [2]byte{0: 42}
h := xxhash.Sum64Uint16(42)
x := xxhash.Sum64(b[:])
if h != x {
t.Errorf("got %064b; want %064b", h, x)
}
}
func TestSumUint32(t *testing.T) {
b := [4]byte{0: 42}
h := xxhash.Sum64Uint32(42)
x := xxhash.Sum64(b[:])
if h != x {
t.Errorf("got %064b; want %064b", h, x)
}
}
func TestSumUint64(t *testing.T) {
b := [8]byte{0: 42}
h := xxhash.Sum64Uint64(42)
x := xxhash.Sum64(b[:])
if h != x {
t.Errorf("got %064b; want %064b", h, x)
}
}
func TestSumUint128(t *testing.T) {
b := [16]byte{0: 42}
h := xxhash.Sum64Uint128(b)
x := xxhash.Sum64(b[:])
if h != x {
t.Errorf("got %064b; want %064b", h, x)
}
}
func TestMultiSum64Uint8(t *testing.T) {
f := func(v []uint8) bool {
h := make([]uint64, len(v))
n := xxhash.MultiSum64Uint8(h, v)
if n != len(v) {
t.Errorf("return value mismatch: got %d; want %d", n, len(v))
return false
}
for i := range h {
x := xxhash.Sum64(v[i : i+1])
if h[i] != x {
t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
return false
}
}
return true
}
if err := quick.Check(f, nil); err != nil {
t.Error(err)
}
}
func TestMultiSum64Uint16(t *testing.T) {
f := func(v []uint16) bool {
h := make([]uint64, len(v))
n := xxhash.MultiSum64Uint16(h, v)
if n != len(v) {
t.Errorf("return value mismatch: got %d; want %d", n, len(v))
return false
}
for i := range h {
b := [2]byte{}
binary.LittleEndian.PutUint16(b[:], v[i])
x := xxhash.Sum64(b[:])
if h[i] != x {
t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
return false
}
}
return true
}
if err := quick.Check(f, nil); err != nil {
t.Error(err)
}
}
func TestMultiSum64Uint32(t *testing.T) {
f := func(v []uint32) bool {
h := make([]uint64, len(v))
n := xxhash.MultiSum64Uint32(h, v)
if n != len(v) {
t.Errorf("return value mismatch: got %d; want %d", n, len(v))
return false
}
for i := range h {
b := [4]byte{}
binary.LittleEndian.PutUint32(b[:], v[i])
x := xxhash.Sum64(b[:])
if h[i] != x {
t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
return false
}
}
return true
}
if err := quick.Check(f, nil); err != nil {
t.Error(err)
}
}
func TestMultiSum64Uint64(t *testing.T) {
f := func(v []uint64) bool {
h := make([]uint64, len(v))
n := xxhash.MultiSum64Uint64(h, v)
if n != len(v) {
t.Errorf("return value mismatch: got %d; want %d", n, len(v))
return false
}
for i := range h {
b := [8]byte{}
binary.LittleEndian.PutUint64(b[:], v[i])
x := xxhash.Sum64(b[:])
if h[i] != x {
t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
return false
}
}
return true
}
if err := quick.Check(f, nil); err != nil {
t.Error(err)
}
}
func TestMultiSum64Uint128(t *testing.T) {
f := func(v [][16]byte) bool {
h := make([]uint64, len(v))
n := xxhash.MultiSum64Uint128(h, v)
if n != len(v) {
t.Errorf("return value mismatch: got %d; want %d", n, len(v))
return false
}
for i := range h {
x := xxhash.Sum64(v[i][:])
if h[i] != x {
t.Errorf("sum at index %d mismatch: got %064b; want %064b", i, h[i], x)
return false
}
}
return true
}
if err := quick.Check(f, nil); err != nil {
t.Error(err)
}
}
func reportThroughput(b *testing.B, loops, count int, start time.Time) {
throughput := float64(loops*count) / time.Since(start).Seconds()
// Measure the throughput of writes to the output buffer;
// it makes the results comparable across benchmarks that
// have inputs of different sizes.
b.SetBytes(8 * int64(count))
b.ReportMetric(0, "ns/op")
b.ReportMetric(throughput, "hash/s")
}
const benchmarkBufferSize = 4096
func BenchmarkMultiSum64Uint8(b *testing.B) {
in := make([]uint8, benchmarkBufferSize)
for i := range in {
in[i] = uint8(i)
}
b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
out := make([]uint64, len(in))
start := time.Now()
for i := 0; i < b.N; i++ {
_ = xxhash.MultiSum64Uint8(out, in)
}
reportThroughput(b, b.N, len(out), start)
})
}
func BenchmarkMultiSum64Uint16(b *testing.B) {
in := make([]uint16, benchmarkBufferSize/2)
for i := range in {
in[i] = uint16(i)
}
b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
out := make([]uint64, len(in))
start := time.Now()
for i := 0; i < b.N; i++ {
_ = xxhash.MultiSum64Uint16(out, in)
}
reportThroughput(b, b.N, len(out), start)
})
}
func BenchmarkMultiSum64Uint32(b *testing.B) {
in := make([]uint32, benchmarkBufferSize/4)
for i := range in {
in[i] = uint32(i)
}
b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
out := make([]uint64, len(in))
start := time.Now()
for i := 0; i < b.N; i++ {
_ = xxhash.MultiSum64Uint32(out, in)
}
reportThroughput(b, b.N, len(out), start)
})
}
func BenchmarkMultiSum64Uint64(b *testing.B) {
in := make([]uint64, benchmarkBufferSize/8)
for i := range in {
in[i] = uint64(i)
}
b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
out := make([]uint64, len(in))
start := time.Now()
for i := 0; i < b.N; i++ {
_ = xxhash.MultiSum64Uint64(out, in)
}
reportThroughput(b, b.N, len(out), start)
})
}
func BenchmarkMultiSum64Uint128(b *testing.B) {
in := make([][16]byte, benchmarkBufferSize/16)
for i := range in {
binary.LittleEndian.PutUint64(in[i][:8], uint64(i))
binary.LittleEndian.PutUint64(in[i][8:], uint64(i))
}
b.Run(fmt.Sprintf("%dKB", benchmarkBufferSize/1024), func(b *testing.B) {
out := make([]uint64, len(in))
start := time.Now()
for i := 0; i < b.N; i++ {
_ = xxhash.MultiSum64Uint128(out, in)
}
reportThroughput(b, b.N, len(out), start)
})
}
================================================
FILE: bloom/xxhash/xxhash.go
================================================
// Package xxhash is an extension of github.com/cespare/xxhash which adds
// routines optimized to hash arrays of fixed size elements.
package xxhash
import (
"encoding/binary"
"math/bits"
)
const (
prime1 uint64 = 0x9E3779B185EBCA87
prime2 uint64 = 0xC2B2AE3D27D4EB4F
prime3 uint64 = 0x165667B19E3779F9
prime4 uint64 = 0x85EBCA77C2B2AE63
prime5 uint64 = 0x27D4EB2F165667C5
// Pre-computed operations because the compiler otherwise complains that the
// results overflow 64 bit integers.
prime1plus2 uint64 = 0x60EA27EEADC0B5D6 // prime1 + prime2
negprime1 uint64 = 0x61C8864E7A143579 // -prime1
)
func avalanche(h uint64) uint64 {
h ^= h >> 33
h *= prime2
h ^= h >> 29
h *= prime3
h ^= h >> 32
return h
}
func round(acc, input uint64) uint64 {
acc += input * prime2
acc = rol31(acc)
acc *= prime1
return acc
}
func mergeRound(acc, val uint64) uint64 {
val = round(0, val)
acc ^= val
acc = acc*prime1 + prime4
return acc
}
func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }
================================================
FILE: bloom/xxhash/xxhash_amd64.go
================================================
//go:build !purego
package xxhash
// Sum64 computes the 64-bit xxHash digest of b.
func Sum64(b []byte) uint64
================================================
FILE: bloom/xxhash/xxhash_amd64.s
================================================
//go:build !purego
#include "textflag.h"
#define PRIME1 0x9E3779B185EBCA87
#define PRIME2 0xC2B2AE3D27D4EB4F
#define PRIME3 0x165667B19E3779F9
#define PRIME4 0x85EBCA77C2B2AE63
#define PRIME5 0x27D4EB2F165667C5
DATA prime3<>+0(SB)/8, $PRIME3
GLOBL prime3<>(SB), RODATA|NOPTR, $8
DATA prime5<>+0(SB)/8, $PRIME5
GLOBL prime5<>(SB), RODATA|NOPTR, $8
// Register allocation:
// AX h
// SI pointer to advance through b
// DX n
// BX loop end
// R8 v1, k1
// R9 v2
// R10 v3
// R11 v4
// R12 tmp
// R13 PRIME1
// R14 PRIME2
// DI PRIME4
// round reads from and advances the buffer pointer in SI.
// It assumes that R13 has PRIME1 and R14 has PRIME2.
#define round(r) \
MOVQ (SI), R12 \
ADDQ $8, SI \
IMULQ R14, R12 \
ADDQ R12, r \
ROLQ $31, r \
IMULQ R13, r
// mergeRound applies a merge round on the two registers acc and val.
// It assumes that R13 has PRIME1, R14 has PRIME2, and DI has PRIME4.
#define mergeRound(acc, val) \
IMULQ R14, val \
ROLQ $31, val \
IMULQ R13, val \
XORQ val, acc \
IMULQ R13, acc \
ADDQ DI, acc
// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT, $0-32
// Load fixed primes.
MOVQ $PRIME1, R13
MOVQ $PRIME2, R14
MOVQ $PRIME4, DI
// Load slice.
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), DX
LEAQ (SI)(DX*1), BX
// The first loop limit will be len(b)-32.
SUBQ $32, BX
// Check whether we have at least one block.
CMPQ DX, $32
JLT noBlocks
// Set up initial state (v1, v2, v3, v4).
MOVQ R13, R8
ADDQ R14, R8
MOVQ R14, R9
XORQ R10, R10
XORQ R11, R11
SUBQ R13, R11
// Loop until SI > BX.
blockLoop:
round(R8)
round(R9)
round(R10)
round(R11)
CMPQ SI, BX
JLE blockLoop
MOVQ R8, AX
ROLQ $1, AX
MOVQ R9, R12
ROLQ $7, R12
ADDQ R12, AX
MOVQ R10, R12
ROLQ $12, R12
ADDQ R12, AX
MOVQ R11, R12
ROLQ $18, R12
ADDQ R12, AX
mergeRound(AX, R8)
mergeRound(AX, R9)
mergeRound(AX, R10)
mergeRound(AX, R11)
JMP afterBlocks
noBlocks:
MOVQ $PRIME5, AX
afterBlocks:
ADDQ DX, AX
// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
ADDQ $24, BX
CMPQ SI, BX
JG fourByte
wordLoop:
// Calculate k1.
MOVQ (SI), R8
ADDQ $8, SI
IMULQ R14, R8
ROLQ $31, R8
IMULQ R13, R8
XORQ R8, AX
ROLQ $27, AX
IMULQ R13, AX
ADDQ DI, AX
CMPQ SI, BX
JLE wordLoop
fourByte:
ADDQ $4, BX
CMPQ SI, BX
JG singles
MOVL (SI), R8
ADDQ $4, SI
IMULQ R13, R8
XORQ R8, AX
ROLQ $23, AX
IMULQ R14, AX
ADDQ prime3<>(SB), AX
singles:
ADDQ $4, BX
CMPQ SI, BX
JGE finalize
singlesLoop:
MOVBQZX (SI), R12
ADDQ $1, SI
IMULQ prime5<>(SB), R12
XORQ R12, AX
ROLQ $11, AX
IMULQ R13, AX
CMPQ SI, BX
JL singlesLoop
finalize:
MOVQ AX, R12
SHRQ $33, R12
XORQ R12, AX
IMULQ R14, AX
MOVQ AX, R12
SHRQ $29, R12
XORQ R12, AX
IMULQ prime3<>(SB), AX
MOVQ AX, R12
SHRQ $32, R12
XORQ R12, AX
MOVQ AX, ret+24(FP)
RET
================================================
FILE: bloom/xxhash/xxhash_purego.go
================================================
//go:build purego || !amd64
package xxhash
// Sum64 computes the 64-bit xxHash digest of b.
func Sum64(b []byte) uint64 {
var n = len(b)
var h uint64
if n >= 32 {
v1 := prime1plus2
v2 := prime2
v3 := uint64(0)
v4 := negprime1
for len(b) >= 32 {
v1 = round(v1, u64(b[0:8:len(b)]))
v2 = round(v2, u64(b[8:16:len(b)]))
v3 = round(v3, u64(b[16:24:len(b)]))
v4 = round(v4, u64(b[24:32:len(b)]))
b = b[32:len(b):len(b)]
}
h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
h = mergeRound(h, v1)
h = mergeRound(h, v2)
h = mergeRound(h, v3)
h = mergeRound(h, v4)
} else {
h = prime5
}
h += uint64(n)
i, end := 0, len(b)
for ; i+8 <= end; i += 8 {
k1 := round(0, u64(b[i:i+8:len(b)]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
if i+4 <= end {
h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
h = rol23(h)*prime2 + prime3
i += 4
}
for ; i < end; i++ {
h ^= uint64(b[i]) * prime5
h = rol11(h) * prime1
}
return avalanche(h)
}
================================================
FILE: bloom/xxhash/xxhash_test.go
================================================
package xxhash_test
import (
"testing"
"github.com/segmentio/parquet-go/bloom/xxhash"
)
func TestSum64(t *testing.T) {
for _, tt := range []struct {
name string
input string
want uint64
}{
{"empty", "", 0xef46db3751d8e999},
{"a", "a", 0xd24ec4f1a98c6e5b},
{"as", "as", 0x1c330fb2d66be179},
{"asd", "asd", 0x631c37ce72a97393},
{"asdf", "asdf", 0x415872f599cea71e},
{
"len=63",
// Exactly 63 characters, which exercises all code paths.
"Call me Ishmael. Some years ago--never mind how long precisely-",
0x02a2e85470d6fd96,
},
} {
t.Run(tt.name, func(t *testing.T) {
if got := xxhash.Sum64([]byte(tt.input)); got != tt.want {
t.Fatalf("Sum64: got 0x%x; want 0x%x", got, tt.want)
}
})
}
}
var benchmarks = []struct {
name string
n int64
}{
{"4B", 4},
{"16B", 16},
{"100B", 100},
{"4KB", 4e3},
{"10MB", 10e6},
}
func BenchmarkSum64(b *testing.B) {
for _, bb := range benchmarks {
in := make([]byte, bb.n)
for i := range in {
in[i] = byte(i)
}
b.Run(bb.name, func(b *testing.B) {
b.SetBytes(bb.n)
for i := 0; i < b.N; i++ {
_ = xxhash.Sum64(in)
}
})
}
}
================================================
FILE: bloom.go
================================================
package parquet
import (
"io"
"github.com/segmentio/parquet-go/bloom"
"github.com/segmentio/parquet-go/bloom/xxhash"
"github.com/segmentio/parquet-go/deprecated"
"github.com/segmentio/parquet-go/encoding"
"github.com/segmentio/parquet-go/format"
"github.com/segmentio/parquet-go/internal/unsafecast"
)
// BloomFilter is an interface allowing applications to test whether a key
// exists in a bloom filter.
type BloomFilter interface {
// Implement the io.ReaderAt interface as a mechanism to allow reading the
// raw bits of the filter.
io.ReaderAt
// Returns the size of the bloom filter (in bytes).
Size() int64
// Tests whether the given value is present in the filter.
//
// A non-nil error may be returned if reading the filter failed. This may
// happen if the filter was lazily loaded from a storage medium during the
// call to Check for example. Applications that can guarantee that the
// filter was in memory at the time Check was called can safely ignore the
// error, which would always be nil in this case.
Check(value Value) (bool, error)
}
type bloomFilter struct {
io.SectionReader
hash bloom.Hash
check func(io.ReaderAt, int64, uint64) (bool, error)
}
func (f *bloomFilter) Check(v Value) (bool, error) {
return f.check(&f.SectionReader, f.Size(), v.hash(f.hash))
}
func (v Value) hash(h bloom.Hash) uint64 {
switch v.Kind() {
case Boolean:
return h.Sum64Uint8(v.byte())
case Int32, Float:
return h.Sum64Uint32(v.uint32())
case Int64, Double:
return h.Sum64Uint64(v.uint64())
default: // Int96, ByteArray, FixedLenByteArray, or null
return h.Sum64(v.byteArray())
}
}
func newBloomFilter(file io.ReaderAt, offset int64, header *format.BloomFilterHeader) *bloomFilter {
if header.Algorithm.Block != nil {
if header.Hash.XxHash != nil {
if header.Compression.Uncompressed != nil {
return &bloomFilter{
SectionReader: *io.NewSectionReader(file, offset, int64(header.NumBytes)),
hash: bloom.XXH64{},
check: bloom.CheckSplitBlock,
}
}
}
}
return nil
}
// The BloomFilterColumn interface is a declarative representation of bloom filters
// used when configuring filters on a parquet writer.
type BloomFilterColumn interface {
// Returns the path of the column that the filter applies to.
Path() []string
// Returns the hashing algorithm used when inserting values into a bloom
// filter.
Hash() bloom.Hash
// Returns an encoding which can be used to write columns of values to the
// filter.
Encoding() encoding.Encoding
// Returns the size of the filter needed to encode values in the filter,
// assuming each value will be encoded with the given number of bits.
Size(numValues int64) int
}
// SplitBlockFilter constructs a split block bloom filter object for the column
// at the given path, with the given bitsPerValue.
//
// If you are unsure what number of bitsPerValue to use, 10 is a reasonable
// tradeoff between size and error rate for common datasets.
//
// For more information on the tradeoff between size and error rate, consult
// this website: https://hur.st/bloomfilter/?n=4000&p=0.1&m=&k=1
func SplitBlockFilter(bitsPerValue uint, path ...string) BloomFilterColumn {
return splitBlockFilter{
bitsPerValue: bitsPerValue,
path: path,
}
}
type splitBlockFilter struct {
bitsPerValue uint
path []string
}
func (f splitBlockFilter) Path() []string { return f.path }
func (f splitBlockFilter) Hash() bloom.Hash { return bloom.XXH64{} }
func (f splitBlockFilter) Encoding() encoding.Encoding { return splitBlockEncoding{} }
func (f splitBlockFilter) Size(numValues int64) int {
return bloom.BlockSize * bloom.NumSplitBlocksOf(numValues, f.bitsPerValue)
}
// Creates a header from the given bloom filter.
//
// For now there is only one type of filter supported, but we provide this
// function to suggest a model for extending the implementation if new filters
// are added to the parquet specs.
func bloomFilterHeader(filter BloomFilterColumn) (header format.BloomFilterHeader) {
switch filter.(type) {
case splitBlockFilter:
header.Algorithm.Block = &format.SplitBlockAlgorithm{}
}
switch filter.Hash().(type) {
case bloom.XXH64:
header.Hash.XxHash = &format.XxHash{}
}
header.Compression.Uncompressed = &format.BloomFilterUncompressed{}
return header
}
func searchBloomFilterColumn(filters []BloomFilterColumn, path columnPath) BloomFilterColumn {
for _, f := range filters {
if path.equal(f.Path()) {
return f
}
}
return nil
}
const (
// Size of the stack buffer used to perform bulk operations on bloom filters.
//
// This value was determined as being a good default empirically,
// 128 x uint64 makes a 1KiB buffer which amortizes the cost of calling
// methods of bloom filters while not causing too much stack growth either.
filterEncodeBufferSize = 128
)
type splitBlockEncoding struct {
encoding.NotSupported
}
func (splitBlockEncoding) EncodeBoolean(dst []byte, src []byte) ([]byte, error) {
splitBlockEncodeUint8(bloom.MakeSplitBlockFilter(dst), src)
return dst, nil
}
func (splitBlockEncoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Int32ToUint32(src))
return dst, nil
}
func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Int64ToUint64(src))
return dst, nil
}
func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]byte, error) {
splitBlockEncodeFixedLenByteArray(bloom.MakeSplitBlockFilter(dst), deprecated.Int96ToBytes(src), 12)
return dst, nil
}
func (splitBlockEncoding) EncodeFloat(dst []byte, src []float32) ([]byte, error) {
splitBlockEncodeUint32(bloom.MakeSplitBlockFilter(dst), unsafecast.Float32ToUint32(src))
return dst, nil
}
func (splitBlockEncoding) EncodeDouble(dst []byte, src []float64) ([]byte, error) {
splitBlockEncodeUint64(bloom.MakeSplitBlockFilter(dst), unsafecast.Float64ToUint64(src))
return dst, nil
}
func (splitBlockEncoding) EncodeByteArray(dst []byte, src []byte, offsets []uint32) ([]byte, error) {
filter := bloom.MakeSplitBlockFilter(dst)
buffer := make([]uint64, 0, filterEncodeBufferSize)
baseOffset := offsets[0]
for _, endOffset := range offsets[1:] {
value := src[baseOffset:endOffset:endOffset]
baseOffset = endOffset
if len(buffer) == cap(buffer) {
filter.InsertBulk(buffer)
buffer = buffer[:0]
}
buffer = append(buffer, xxhash.Sum64(value))
}
filter.InsertBulk(buffer)
return dst, nil
}
func (splitBlockEncoding) EncodeFixedLenByteArray(dst []byte, src []byte, size int) ([]byte, error) {
filter := bloom.MakeSplitBlockFilter(dst)
if size == 16 {
splitBlockEncodeUint128(filter, unsafecast.BytesToUint128(src))
} else {
splitBlockEncodeFixedLenByteArray(filter, src, size)
}
return dst, nil
}
func splitBlockEncodeFixedLenByteArray(filter bloom.SplitBlockFilter, data []byte, size int) {
buffer := make([]uint64, 0, filterEncodeBufferSize)
for i, j := 0, size; j <= len(data); {
if len(buffer) == cap(buffer) {
filter.InsertBulk(buffer)
buffer = buffer[:0]
}
buffer = append(buffer, xxhash.Sum64(data[i:j]))
i += size
j += size
}
filter.InsertBulk(buffer)
}
func splitBlockEncodeUint8(filter bloom.SplitBlockFilter, values []uint8) {
buffer := make([]uint64, filterEncodeBufferSize)
for i := 0; i < len(values); {
n := xxhash.MultiSum64Uint8(buffer, values[i:])
filter.InsertBulk(buffer[:n])
i += n
}
}
func splitBlockEncodeUint32(filter bloom.SplitBlockFilter, values []uint32) {
buffer := make([]uint64, filterEncodeBufferSize)
for i := 0; i < len(values); {
n := xxhash.MultiSum64Uint32(buffer, values[i:])
filter.InsertBulk(buffer[:n])
i += n
}
}
func splitBlockEncodeUint64(filter bloom.SplitBlockFilter, values []uint64) {
buffer := make([]uint64, filterEncodeBufferSize)
for i := 0; i < len(values); {
n := xxhash.MultiSum64Uint64(buffer, values[i:])
filter.InsertBulk(buffer[:n])
i += n
}
}
func splitBlockEncodeUint128(filter bloom.SplitBlockFilter, values [][16]byte) {
buffer := make([]uint64, filterEncodeBufferSize)
for i := 0; i < len(values); {
n := xxhash.MultiSum64Uint128(buffer, values[i:])
filter.InsertBulk(buffer[:n])
i += n
}
}
================================================
FILE: bloom_test.go
================================================
package parquet
import (
"math/rand"
"testing"
"github.com/segmentio/parquet-go/bloom"
"github.com/segmentio/parquet-go/deprecated"
"github.com/segmentio/parquet-go/internal/quick"
"github.com/segmentio/parquet-go/internal/unsafecast"
)
func TestSplitBlockFilter(t *testing.T) {
newFilter := func(numValues int) bloom.SplitBlockFilter {
return make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(int64(numValues), 11))
}
enc := SplitBlockFilter(10, "$").Encoding()
check := func(filter bloom.SplitBlockFilter, value Value) bool {
return filter.Check(value.hash(&bloom.XXH64{}))
}
tests := []struct {
scenario string
function interface{}
}{
{
scenario: "BOOLEAN",
function: func(values []bool) bool {
filter := newFilter(len(values))
enc.EncodeBoolean(filter.Bytes(), unsafecast.BoolToBytes(values))
for _, v := range values {
if !check(filter, ValueOf(v)) {
return false
}
}
return true
},
},
{
scenario: "INT32",
function: func(values []int32) bool {
filter := newFilter(len(values))
enc.EncodeInt32(filter.Bytes(), values)
for _, v := range values {
if !check(filter, ValueOf(v)) {
return false
}
}
return true
},
},
{
scenario: "INT64",
function: func(values []int64) bool {
filter := newFilter(len(values))
enc.EncodeInt64(filter.Bytes(), values)
for _, v := range values {
if !check(filter, ValueOf(v)) {
return false
}
}
return true
},
},
{
scenario: "INT96",
function: func(values []deprecated.Int96) bool {
filter := newFilter(len(values))
enc.EncodeInt96(filter.Bytes(), values)
for _, v := range values {
if !check(filter, ValueOf(v)) {
return false
}
}
return true
},
},
{
scenario: "FLOAT",
function: func(values []float32) bool {
filter := newFilter(len(values))
enc.EncodeFloat(filter.Bytes(), values)
for _, v := range values {
if !check(filter, ValueOf(v)) {
return false
}
}
return true
},
},
{
scenario: "DOUBLE",
function: func(values []float64) bool {
filter := newFilter(len(values))
enc.EncodeDouble(filter.Bytes(), values)
for _, v := range values {
if !check(filter, ValueOf(v)) {
return false
}
}
return true
},
},
{
scenario: "BYTE_ARRAY",
function: func(values [][]byte) bool {
content := make([]byte, 0, 512)
offsets := make([]uint32, len(values))
for _, value := range values {
offsets = append(offsets, uint32(len(content)))
content = append(content, value...)
}
offsets = append(offsets, uint32(len(content)))
filter := newFilter(len(values))
enc.EncodeByteArray(filter.Bytes(), content, offsets)
for _, v := range values {
if !check(filter, ValueOf(v)) {
return false
}
}
return true
},
},
{
scenario: "FIXED_LEN_BYTE_ARRAY",
function: func(values []byte) bool {
filter := newFilter(len(values))
enc.EncodeFixedLenByteArray(filter.Bytes(), values, 1)
for _, v := range values {
if !check(filter, ValueOf([1]byte{v})) {
return false
}
}
return true
},
},
}
for _, test := range tests {
t.Run(test.scenario, func(t *testing.T) {
if err := quick.Check(test.function); err != nil {
t.Error(err)
}
})
}
}
func BenchmarkSplitBlockFilter(b *testing.B) {
const N = 1000
f := make(bloom.SplitBlockFilter, bloom.NumSplitBlocksOf(N, 10)).Bytes()
e := SplitBlockFilter(10, "$").Encoding()
v := make([]int64, N)
r := rand.NewSource(10)
for i := range v {
v[i] = r.Int63()
}
for i := 0; i < b.N; i++ {
e.EncodeInt64(f, v)
}
b.SetBytes(8 * N)
}
================================================
FILE: buffer.go
================================================
package parquet
import (
"log"
"runtime"
"sort"
"sync"
"sync/atomic"
"github.com/segmentio/parquet-go/internal/debug"
)
// Buffer represents an in-memory group of parquet rows.
//
// The main purpose of the Buffer type is to provide a way to sort rows before
// writing them to a parquet file. Buffer implements sort.Interface as a way
// to support reordering the rows that have been written to it.
type Buffer struct {
config *RowGroupConfig
schema *Schema
rowbuf []Row
colbuf [][]Value
chunks []ColumnChunk
columns []ColumnBuffer
sorted []ColumnBuffer
}
// NewBuffer constructs a new buffer, using the given list of buffer options
// to configure the buffer returned by the function.
//
// The function panics if the buffer configuration is invalid. Programs that
// cannot guarantee the validity of the options passed to NewBuffer should
// construct the buffer configuration independently prior to calling this
// function:
//
// config, err := parquet.NewRowGroupConfig(options...)
// if err != nil {
// // handle the configuration error
// ...
// } else {
// // this call to create a buffer is guaranteed not to panic
// buffer := parquet.NewBuffer(config)
// ...
// }
func NewBuffer(options ...RowGroupOption) *Buffer {
config, err := NewRowGroupConfig(options...)
if err != nil {
panic(err)
}
buf := &Buffer{
config: config,
}
if config.Schema != nil {
buf.configure(config.Schema)
}
return buf
}
func (buf *Buffer) configure(schema *Schema) {
if schema == nil {
return
}
sortingColumns := buf.config.Sorting.SortingColumns
buf.sorted = make([]ColumnBuffer, len(sortingColumns))
forEachLeafColumnOf(schema, func(leaf leafColumn) {
nullOrdering := nullsGoLast
columnIndex := int(leaf.columnIndex)
columnType := leaf.node.Type()
bufferCap := buf.config.ColumnBufferCapacity
dictionary := (Dictionary)(nil)
encoding := encodingOf(leaf.node)
if isDictionaryEncoding(encoding) {
estimatedDictBufferSize := columnType.EstimateSize(bufferCap)
dictBuffer := columnType.NewValues(
make([]byte, 0, estimatedDictBufferSize),
nil,
)
dictionary = columnType.NewDictionary(columnIndex, 0, dictBuffer)
columnType = dictionary.Type()
}
sortingIndex := searchSortingColumn(sortingColumns, leaf.path)
if sortingIndex < len(sortingColumns) && sortingColumns[sortingIndex].NullsFirst() {
nullOrdering = nullsGoFirst
}
column := columnType.NewColumnBuffer(columnIndex, bufferCap)
switch {
case leaf.maxRepetitionLevel > 0:
column = newRepeatedColumnBuffer(column, leaf.maxRepetitionLevel, leaf.maxDefinitionLevel, nullOrdering)
case leaf.maxDefinitionLevel > 0:
column = newOptionalColumnBuffer(column, leaf.maxDefinitionLevel, nullOrdering)
}
buf.columns = append(buf.columns, column)
if sortingIndex < len(sortingColumns) {
if sortingColumns[sortingIndex].Descending() {
column = &reversedColumnBuffer{column}
}
buf.sorted[sortingIndex] = column
}
})
buf.schema = schema
buf.rowbuf = make([]Row, 0, 1)
buf.colbuf = make([][]Value, len(buf.columns))
buf.chunks = make([]ColumnChunk, len(buf.columns))
for i, column := range buf.columns {
buf.chunks[i] = column
}
}
// Size returns the estimated size of the buffer in memory (in bytes).
func (buf *Buffer) Size() int64 {
size := int64(0)
for _, col := range buf.columns {
size += col.Size()
}
return size
}
// NumRows returns the number of rows written to the buffer.
func (buf *Buffer) NumRows() int64 { return int64(buf.Len()) }
// ColumnChunks returns the buffer columns.
func (buf *Buffer) ColumnChunks() []ColumnChunk { return buf.chunks }
// ColumnBuffer returns the buffer columns.
//
// This method is similar to ColumnChunks, but returns a list of ColumnBuffer
// instead of a ColumnChunk values (the latter being read-only); calling
// ColumnBuffers or ColumnChunks with the same index returns the same underlying
// objects, but with different types, which removes the need for making a type
// assertion if the program needed to write directly to the column buffers.
// The presence of the ColumnChunks method is still required to satisfy the
// RowGroup interface.
func (buf *Buffer) ColumnBuffers() []ColumnBuffer { return buf.columns }
// Schema returns the schema of the buffer.
//
// The schema is either configured by passing a Schema in the option list when
// constructing the buffer, or lazily discovered when the first row is written.
func (buf *Buffer) Schema() *Schema { return buf.schema }
// SortingColumns returns the list of columns by which the buffer will be
// sorted.
//
// The sorting order is configured by passing a SortingColumns option when
// constructing the buffer.
func (buf *Buffer) SortingColumns() []SortingColumn { return buf.config.Sorting.SortingColumns }
// Len returns the number of rows written to the buffer.
func (buf *Buffer) Len() int {
if len(buf.columns) == 0 {
return 0
} else {
// All columns have the same number of rows.
return buf.columns[0].Len()
}
}
// Less returns true if row[i] < row[j] in the buffer.
func (buf *Buffer) Less(i, j int) bool {
for _, col := range buf.sorted {
switch {
case col.Less(i, j):
return true
case col.Less(j, i):
return false
}
}
return false
}
// Swap exchanges the rows at indexes i and j.
func (buf *Buffer) Swap(i, j int) {
for _, col := range buf.columns {
col.Swap(i, j)
}
}
// Reset clears the content of the buffer, allowing it to be reused.
func (buf *Buffer) Reset() {
for _, col := range buf.columns {
col.Reset()
}
}
// Write writes a row held in a Go value to the buffer.
func (buf *Buffer) Write(row interface{}) error {
if buf.schema == nil {
buf.configure(SchemaOf(row))
}
buf.rowbuf = buf.rowbuf[:1]
defer clearRows(buf.rowbuf)
buf.rowbuf[0] = buf.schema.Deconstruct(buf.rowbuf[0], row)
_, err := buf.WriteRows(buf.rowbuf)
return err
}
// WriteRows writes parquet rows to the buffer.
func (buf *Buffer) WriteRows(rows []Row) (int, error) {
defer func() {
for i, colbuf := range buf.colbuf {
clearValues(colbuf)
buf.colbuf[i] = colbuf[:0]
}
}()
if buf.schema == nil {
return 0, ErrRowGroupSchemaMissing
}
for _, row := range rows {
for _, value := range row {
columnIndex := value.Column()
buf.colbuf[columnIndex] = append(buf.colbuf[columnIndex], value)
}
}
for columnIndex, values := range buf.colbuf {
if _, err := buf.columns[columnIndex].WriteValues(values); err != nil {
// TODO: an error at this stage will leave the buffer in an invalid
// state since the row was partially written. Applications are not
// expected to continue using the buffer after getting an error,
// maybe we can enforce it?
return 0, err
}
}
return len(rows), nil
}
// WriteRowGroup satisfies the RowGroupWriter interface.
func (buf *Buffer) WriteRowGroup(rowGroup RowGroup) (int64, error) {
rowGroupSchema := rowGroup.Schema()
switch {
case rowGroupSchema == nil:
return 0, ErrRowGroupSchemaMissing
case buf.schema == nil:
buf.configure(rowGroupSchema)
case !nodesAreEqual(buf.schema, rowGroupSchema):
return 0, ErrRowGroupSchemaMismatch
}
if !sortingColumnsHavePrefix(rowGroup.SortingColumns(), buf.SortingColumns()) {
return 0, ErrRowGroupSortingColumnsMismatch
}
n := buf.NumRows()
r := rowGroup.Rows()
defer r.Close()
_, err := CopyRows(bufferWriter{buf}, r)
return buf.NumRows() - n, err
}
// Rows returns a reader exposing the current content of the buffer.
//
// The buffer and the returned reader share memory. Mutating the buffer
// concurrently to reading rows may result in non-deterministic behavior.
func (buf *Buffer) Rows() Rows { return newRowGroupRows(buf, ReadModeSync) }
// bufferWriter is an adapter for Buffer which implements both RowWriter and
// PageWriter to enable optimizations in CopyRows for types that support writing
// rows by copying whole pages instead of calling WriteRow repeatedly.
type bufferWriter struct{ buf *Buffer }
func (w bufferWriter) WriteRows(rows []Row) (int, error) {
return w.buf.WriteRows(rows)
}
func (w bufferWriter) WriteValues(values []Value) (int, error) {
return w.buf.columns[values[0].Column()].WriteValues(values)
}
func (w bufferWriter) WritePage(page Page) (int64, error) {
return CopyValues(w.buf.columns[page.Column()], page.Values())
}
var (
_ RowGroup = (*Buffer)(nil)
_ RowGroupWriter = (*Buffer)(nil)
_ sort.Interface = (*Buffer)(nil)
_ RowWriter = (*bufferWriter)(nil)
_ PageWriter = (*bufferWriter)(nil)
_ ValueWriter = (*bufferWriter)(nil)
)
type buffer struct {
data []byte
refc uintptr
pool *bufferPool
stack []byte
}
func (b *buffer) refCount() int {
return int(atomic.LoadUintptr(&b.refc))
}
func (b *buffer) ref() {
atomic.AddUintptr(&b.refc, +1)
}
func (b *buffer) unref() {
if atomic.AddUintptr(&b.refc, ^uintptr(0)) == 0 {
if b.pool != nil {
b.pool.put(b)
}
}
}
func monitorBufferRelease(b *buffer) {
if rc := b.refCount(); rc != 0 {
log.Printf("PARQUETGODEBUG: buffer garbage collected with non-zero reference count\n%s", string(b.stack))
}
}
type bufferPool struct {
// Buckets are split in two groups for short and large buffers. In the short
// buffer group (below 256KB), the growth rate between each bucket is 2. The
// growth rate changes to 1.5 in the larger buffer group.
//
// Short buffer buckets:
// ---------------------
// 4K, 8K, 16K, 32K, 64K, 128K, 256K
//
// Large buffer buckets:
// ---------------------
// 364K, 546K, 819K ...
//
buckets [bufferPoolBucketCount]sync.Pool
}
func (p *bufferPool) newBuffer(bufferSize, bucketSize int) *buffer {
b := &buffer{
data: make([]byte, bufferSize, bucketSize),
refc: 1,
pool: p,
}
if debug.TRACEBUF > 0 {
b.stack = make([]byte, 4096)
runtime.SetFinalizer(b, monitorBufferRelease)
}
return b
}
// get returns a buffer from the levelled buffer pool. size is used to choose
// the appropriate pool.
func (p *bufferPool) get(bufferSize int) *buffer {
bucketIndex, bucketSize := bufferPoolBucketIndexAndSizeOfGet(bufferSize)
b := (*buffer)(nil)
if bucketIndex >= 0 {
b, _ = p.buckets[bucketIndex].Get().(*buffer)
}
if b == nil {
b = p.newBuffer(bufferSize, bucketSize)
} else {
b.data = b.data[:bufferSize]
b.ref()
}
if debug.TRACEBUF > 0 {
b.stack = b.stack[:runtime.Stack(b.stack[:cap(b.stack)], false)]
}
return b
}
func (p *bufferPool) put(b *buffer) {
if b.pool != p {
panic("BUG: buffer returned to a different pool than the one it was allocated from")
}
if b.refCount() != 0 {
panic("BUG: buffer returned to pool with a non-zero reference count")
}
if bucketIndex, _ := bufferPoolBucketIndexAndSizeOfPut(cap(b.data)); bucketIndex >= 0 {
p.buckets[bucketIndex].Put(b)
}
}
const (
bufferPoolBucketCount = 32
bufferPoolMinSize = 4096
bufferPoolLastShortBucketSize = 262144
)
func bufferPoolNextSize(size int) int {
if size < bufferPoolLastShortBucketSize {
return size * 2
} else {
return size + (size / 2)
}
}
func bufferPoolBucketIndexAndSizeOfGet(size int) (int, int) {
limit := bufferPoolMinSize
for i := 0; i < bufferPoolBucketCount; i++ {
if size <= limit {
return i, limit
}
limit = bufferPoolNextSize(limit)
}
return -1, size
}
func bufferPoolBucketIndexAndSizeOfPut(size int) (int, int) {
// When releasing buffers, some may have a capacity that is not one of the
// bucket sizes (due to the use of append for example). In this case, we
// have to put the buffer is the highest bucket with a size less or equal
// to the buffer capacity.
if limit := bufferPoolMinSize; size >= limit {
for i := 0; i < bufferPoolBucketCount; i++ {
n := bufferPoolNextSize(limit)
if size < n {
return i, limit
}
limit = n
}
}
return -1, size
}
var (
buffers bufferPool
)
type bufferedPage struct {
Page
values *buffer
offsets *buffer
repetitionLevels *buffer
definitionLevels *buffer
}
func newBufferedPage(page Page, values, offsets, definitionLevels, repetitionLevels *buffer) *bufferedPage {
p := &bufferedPage{
Page: page,
values: values,
offsets: offsets,
definitionLevels: definitionLevels,
repetitionLevels: repetitionLevels,
}
bufferRef(values)
bufferRef(offsets)
bufferRef(definitionLevels)
bufferRef(repetitionLevels)
return p
}
func (p *bufferedPage) Slice(i, j int64) Page {
return newBufferedPage(
p.Page.Slice(i, j),
p.values,
p.offsets,
p.definitionLevels,
p.repetitionLevels,
)
}
func (p *bufferedPage) Retain() {
bufferRef(p.values)
bufferRef(p.offsets)
bufferRef(p.definitionLevels)
bufferRef(p.repetitionLevels)
}
func (p *bufferedPage) Release() {
bufferUnref(p.values)
bufferUnref(p.offsets)
bufferUnref(p.definitionLevels)
bufferUnref(p.repetitionLevels)
}
func bufferRef(buf *buffer) {
if buf != nil {
buf.ref()
}
}
func bufferUnref(buf *buffer) {
if buf != nil {
buf.unref()
}
}
// Retain is a helper function to increment the reference counter of pages
// backed by memory which can be granularly managed by the application.
//
// Usage of this function is optional and with Release, is intended to allow
// finer grain memory management in the application. Most programs should be
// able to rely on automated memory management provided by the Go garbage
// collector instead.
//
// The function should be called when a page lifetime is about to be shared
// between multiple goroutines or layers of an application, and the program
// wants to express "sharing ownership" of the page.
//
// Calling this function on pages that do not embed a reference counter does
// nothing.
func Retain(page Page) {
if p, _ := page.(retainable); p != nil {
p.Retain()
}
}
// Release is a helper function to decrement the reference counter of pages
// backed by memory which can be granularly managed by the application.
//
// Usage of this is optional and with Retain, is intended to allow finer grained
// memory management in the application, at the expense of potentially causing
// panics if the page is used after its reference count has reached zero. Most
// programs should be able to rely on automated memory management provided by
// the Go garbage collector instead.
//
// The function should be called to return a page to the internal buffer pool,
// when a goroutine "releases ownership" it acquired either by being the single
// owner (e.g. capturing the return value from a ReadPage call) or having gotten
// shared ownership by calling Retain.
//
// Calling this function on pages that do not embed a reference counter does
// nothing.
func Release(page Page) {
if p, _ := page.(releasable); p != nil {
p.Release()
}
}
type retainable interface {
Retain()
}
type releasable interface {
Release()
}
var (
_ retainable = (*bufferedPage)(nil)
_ releasable = (*bufferedPage)(nil)
)
================================================
FILE: buffer_go18.go
================================================
//go:build go1.18
package parquet
import (
"reflect"
"sort"
)
// GenericBuffer is similar to a Buffer but uses a type parameter to define the
// Go type representing the schema of rows in the buffer.
//
// See GenericWriter for details about the benefits over the classic Buffer API.
type GenericBuffer[T any] struct {
base Buffer
write bufferFunc[T]
}
// NewGenericBuffer is like NewBuffer but returns a GenericBuffer[T] suited to write
// rows of Go type T.
//
// The type parameter T should be a map, struct, or any. Any other types will
// cause a panic at runtime. Type checking is a lot more effective when the
// generic parameter is a struct type, using map and interface types is somewhat
// similar to using a Writer. If using an interface type for the type parameter,
// then providing a schema at instantiation is required.
//
// If the option list may explicitly declare a schema, it must be compatible
// with the schema generated from T.
func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] {
config, err := NewRowGroupConfig(options...)
if err != nil {
panic(err)
}
t := typeOf[T]()
if config.Schema == nil && t != nil {
config.Schema = schemaOf(dereference(t))
}
if config.Schema == nil {
panic("generic buffer must be instantiated with schema or concrete type.")
}
buf := &GenericBuffer[T]{
base: Buffer{config: config},
}
buf.base.configure(config.Schema)
buf.write = bufferFuncOf[T](t, config.Schema)
return buf
}
func typeOf[T any]() reflect.Type {
var v T
return reflect.TypeOf(v)
}
type bufferFunc[T any] func(*GenericBuffer[T], []T) (int, error)
func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
if t == nil {
return (*GenericBuffer[T]).writeRows
}
switch t.Kind() {
case reflect.Interface, reflect.Map:
return (*GenericBuffer[T]).writeRows
case reflect.Struct:
return makeBufferFunc[T](t, schema)
case reflect.Pointer:
if e := t.Elem(); e.Kind() == reflect.Struct {
return makeBufferFunc[T](t, schema)
}
}
panic("cannot create buffer for values of type " + t.String())
}
func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
writeRows := writeRowsFuncOf(t, schema, nil)
return func(buf *GenericBuffer[T], rows []T) (n int, err error) {
err = writeRows(buf.base.columns, makeArrayOf(rows), columnLevels{})
if err == nil {
n = len(rows)
}
return n, err
}
}
func (buf *GenericBuffer[T]) Size() int64 {
return buf.base.Size()
}
func (buf *GenericBuffer[T]) NumRows() int64 {
return buf.base.NumRows()
}
func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk {
return buf.base.ColumnChunks()
}
func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer {
return buf.base.ColumnBuffers()
}
func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn {
return buf.base.SortingColumns()
}
func (buf *GenericBuffer[T]) Len() int {
return buf.base.Len()
}
func (buf *GenericBuffer[T]) Less(i, j int) bool {
return buf.base.Less(i, j)
}
func (buf *GenericBuffer[T]) Swap(i, j int) {
buf.base.Swap(i, j)
}
func (buf *GenericBuffer[T]) Reset() {
buf.base.Reset()
}
func (buf *GenericBuffer[T]) Write(rows []T) (int, error) {
if len(rows) == 0 {
return 0, nil
}
return buf.write(buf, rows)
}
func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) {
return buf.base.WriteRows(rows)
}
func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) {
return buf.base.WriteRowGroup(rowGroup)
}
func (buf *GenericBuffer[T]) Rows() Rows {
return buf.base.Rows()
}
func (buf *GenericBuffer[T]) Schema() *Schema {
return buf.base.Schema()
}
func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) {
if cap(buf.base.rowbuf) < len(rows) {
buf.base.rowbuf = make([]Row, len(rows))
} else {
buf.base.rowbuf = buf.base.rowbuf[:len(rows)]
}
defer clearRows(buf.base.rowbuf)
schema := buf.base.Schema()
for i := range rows {
buf.base.rowbuf[i] = schema.Deconstruct(buf.base.rowbuf[i], &rows[i])
}
return buf.base.WriteRows(buf.base.rowbuf)
}
var (
_ RowGroup = (*GenericBuffer[any])(nil)
_ RowGroupWriter = (*GenericBuffer[any])(nil)
_ sort.Interface = (*GenericBuffer[any])(nil)
_ RowGroup = (*GenericBuffer[struct{}])(nil)
_ RowGroupWriter = (*GenericBuffer[struct{}])(nil)
_ sort.Interface = (*GenericBuffer[struct{}])(nil)
_ RowGroup = (*GenericBuffer[map[struct{}]struct{}])(nil)
_ RowGroupWriter = (*GenericBuffer[map[struct{}]struct{}])(nil)
_ sort.Interface = (*GenericBuffer[map[struct{}]struct{}])(nil)
)
================================================
FILE: buffer_go18_test.go
================================================
//go:build go1.18
package parquet_test
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math/rand"
"reflect"
"sort"
"testing"
"github.com/segmentio/parquet-go"
)
func TestGenericBuffer(t *testing.T) {
testGenericBuffer[booleanColumn](t)
testGenericBuffer[int32Column](t)
testGenericBuffer[int64Column](t)
testGenericBuffer[int96Column](t)
testGenericBuffer[floatColumn](t)
testGenericBuffer[doubleColumn](t)
testGenericBuffer[byteArrayColumn](t)
testGenericBuffer[fixedLenByteArrayColumn](t)
testGenericBuffer[stringColumn](t)
testGenericBuffer[indexedStringColumn](t)
testGenericBuffer[uuidColumn](t)
testGenericBuffer[timeColumn](t)
testGenericBuffer[timeInMillisColumn](t)
testGenericBuffer[mapColumn](t)
testGenericBuffer[decimalColumn](t)
testGenericBuffer[addressBook](t)
testGenericBuffer[contact](t)
testGenericBuffer[listColumn2](t)
testGenericBuffer[listColumn1](t)
testGenericBuffer[listColumn0](t)
testGenericBuffer[nestedListColumn1](t)
testGenericBuffer[nestedListColumn](t)
testGenericBuffer[*contact](t)
testGenericBuffer[paddedBooleanColumn](t)
testGenericBuffer[optionalInt32Column](t)
testGenericBuffer[repeatedInt32Column](t)
}
func testGenericBuffer[Row any](t *testing.T) {
var model Row
t.Run(reflect.TypeOf(model).Name(), func(t *testing.T) {
err := quickCheck(func(rows []Row) bool {
if len(rows) == 0 {
return true // TODO: fix support for parquet files with zero rows
}
if err := testGenericBufferRows(rows); err != nil {
t.Error(err)
return false
}
return true
})
if err != nil {
t.Error(err)
}
})
}
func testGenericBufferRows[Row any](rows []Row) error {
setNullPointers(rows)
buffer := parquet.NewGenericBuffer[Row]()
_, err := buffer.Write(rows)
if err != nil {
return err
}
reader := parquet.NewGenericRowGroupReader[Row](buffer)
result := make([]Row, len(rows))
n, err := reader.Read(result)
if err != nil && !errors.Is(err, io.EOF) {
return err
}
if n < len(rows) {
return fmt.Errorf("not enough values were read: want=%d got=%d", len(rows), n)
}
if !reflect.DeepEqual(rows, result) {
return fmt.Errorf("rows mismatch:\nwant: %#v\ngot: %#v", rows, result)
}
return nil
}
func setNullPointers[Row any](rows []Row) {
if len(rows) > 0 && reflect.TypeOf(rows[0]).Kind() == reflect.Pointer {
for i := range rows {
v := reflect.ValueOf(&rows[i]).Elem()
if v.IsNil() {
v.Set(reflect.New(v.Type().Elem()))
}
}
}
}
type generator[T any] interface {
generate(*rand.Rand) T
}
func BenchmarkGenericBuffer(b *testing.B) {
benchmarkGenericBuffer[benchmarkRowType](b)
benchmarkGenericBuffer[booleanColumn](b)
benchmarkGenericBuffer[int32Column](b)
benchmarkGenericBuffer[int64Column](b)
benchmarkGenericBuffer[floatColumn](b)
benchmarkGenericBuffer[doubleColumn](b)
benchmarkGenericBuffer[byteArrayColumn](b)
benchmarkGenericBuffer[fixedLenByteArrayColumn](b)
benchmarkGenericBuffer[stringColumn](b)
benchmarkGenericBuffer[indexedStringColumn](b)
benchmarkGenericBuffer[uuidColumn](b)
benchmarkGenericBuffer[timeColumn](b)
benchmarkGenericBuffer[timeInMillisColumn](b)
benchmarkGenericBuffer[mapColumn](b)
benchmarkGenericBuffer[decimalColumn](b)
benchmarkGenericBuffer[contact](b)
benchmarkGenericBuffer[paddedBooleanColumn](b)
benchmarkGenericBuffer[optionalInt32Column](b)
benchmarkGenericBuffer[repeatedInt32Column](b)
}
func benchmarkGenericBuffer[Row generator[Row]](b *testing.B) {
var model Row
b.Run(reflect.TypeOf(model).Name(), func(b *testing.B) {
prng := rand.New(rand.NewSource(0))
rows := make([]Row, benchmarkNumRows)
for i := range rows {
rows[i] = rows[i].generate(prng)
}
b.Run("go1.17", func(b *testing.B) {
buffer := parquet.NewBuffer(parquet.SchemaOf(rows[0]))
i := 0
benchmarkRowsPerSecond(b, func() int {
for j := 0; j < benchmarkRowsPerStep; j++ {
if err := buffer.Write(&rows[i]); err != nil {
b.Fatal(err)
}
}
i += benchmarkRowsPerStep
i %= benchmarkNumRows
if i == 0 {
buffer.Reset()
}
return benchmarkRowsPerStep
})
})
b.Run("go1.18", func(b *testing.B) {
buffer := parquet.NewGenericBuffer[Row]()
i := 0
benchmarkRowsPerSecond(b, func() int {
n, err := buffer.Write(rows[i : i+benchmarkRowsPerStep])
if err != nil {
b.Fatal(err)
}
i += benchmarkRowsPerStep
i %= benchmarkNumRows
if i == 0 {
buffer.Reset()
}
return n
})
})
})
}
func TestIssue327(t *testing.T) {
t.Run("untagged nested lists should panic", func(t *testing.T) {
type testType struct {
ListOfLists [][]int
}
defer func() {
if r := recover(); r == nil {
t.Errorf("Nested lists without the list tag should panic")
}
}()
_ = parquet.NewGenericBuffer[testType]()
})
}
func TestIssue346(t *testing.T) {
type TestType struct {
Key int
}
schema := parquet.SchemaOf(TestType{})
buffer := parquet.NewGenericBuffer[any](schema)
data := make([]any, 1)
data[0] = TestType{Key: 0}
_, _ = buffer.Write(data)
}
func TestIssue347(t *testing.T) {
type TestType struct {
Key int
}
// instantiating with concrete type shouldn't panic
_ = parquet.NewGenericBuffer[TestType]()
// instantiating with schema and interface type parameter shouldn't panic
schema := parquet.SchemaOf(TestType{})
_ = parquet.NewGenericBuffer[any](schema)
defer func() {
if r := recover(); r == nil {
t.Errorf("instantiating generic buffer without schema and with interface " +
"type parameter should panic")
}
}()
_ = parquet.NewGenericBuffer[any]()
}
func BenchmarkSortGenericBuffer(b *testing.B) {
type Row struct {
I0 int64
I1 int64
I2 int64
I3 int64
I4 int64
I5 int64
I6 int64
I7 int64
I8 int64
I9 int64
ID [16]byte
}
buf := parquet.NewGenericBuffer[Row](
parquet.SortingRowGroupConfig(
parquet.SortingColumns(
parquet.Ascending("ID"),
),
),
)
rows := make([]Row, 10e3)
prng := rand.New(rand.NewSource(0))
for i := range rows {
binary.LittleEndian.PutUint64(rows[i].ID[:8], uint64(i))
binary.LittleEndian.PutUint64(rows[i].ID[8:], ^uint64(i))
}
buf.Write(rows)
b.ResetTimer()
for i := 0; i < b.N; i++ {
for j := 0; j < 10; j++ {
buf.Swap(prng.Intn(len(rows)), prng.Intn(len(rows)))
}
sort.Sort(buf)
}
}
================================================
FILE: buffer_internal_test.go
================================================
package parquet
import (
"fmt"
"math/rand"
"testing"
)
func TestBufferAlwaysCorrectSize(t *testing.T) {
var p bufferPool
for i := 0; i < 1000; i++ {
n := rand.Intn(1024 * 1024)
b := p.get(n)
if len(b.data) != n {
t.Fatalf("Expected buffer of size %d, got %d", n, len(b.data))
}
b.unref()
}
}
func TestBufferPoolBucketIndexAndSizeOf(t *testing.T) {
tests := []struct {
size int
bucketIndex int
bucketSize int
}{
{size: 0, bucketIndex: 0, bucketSize: 4096},
{size: 1, bucketIndex: 0, bucketSize: 4096},
{size: 2049, bucketIndex: 0, bucketSize: 4096},
{size: 4096, bucketIndex: 0, bucketSize: 4096},
{size: 4097, bucketIndex: 1, bucketSize: 8192},
{size: 8192, bucketIndex: 1, bucketSize: 8192},
{size: 8193, bucketIndex: 2, bucketSize: 16384},
{size: 16384, bucketIndex: 2, bucketSize: 16384},
{size: 16385, bucketIndex: 3, bucketSize: 32768},
{size: 32768, bucketIndex: 3, bucketSize: 32768},
{size: 32769, bucketIndex: 4, bucketSize: 65536},
{size: 262143, bucketIndex: 6, bucketSize: 262144},
{size: 262144, bucketIndex: 6, bucketSize: 262144},
{size: 262145, bucketIndex: 7, bucketSize: 393216},
}
for _, test := range tests {
t.Run(fmt.Sprintf("size=%d", test.size), func(t *testing.T) {
bucketIndex, bucketSize := bufferPoolBucketIndexAndSizeOfGet(test.size)
if bucketIndex != test.bucketIndex {
t.Errorf("wrong bucket index, want %d but got %d", test.bucketIndex, bucketIndex)
}
if bucketSize != test.bucketSize {
t.Errorf("wrong bucket size, want %d but got %d", test.bucketSize, bucketSize)
}
})
}
}
================================================
FILE: buffer_pool.go
================================================
package parquet
import (
"fmt"
"io"
"os"
"path/filepath"
"sync"
)
// BufferPool is an interface abstracting the underlying implementation of
// page buffer pools.
//
// The parquet-go package provides two implementations of this interface, one
// backed by in-memory buffers (on the Go heap), and the other using temporary
// files on disk.
//
// Applications which need finer grain control over the allocation and retention
// of page buffers may choose to provide their own implementation and install it
// via the parquet.ColumnPageBuffers writer option.
//
// BufferPool implementations must be safe to use concurrently from multiple
// goroutines.
type BufferPool interface {
// GetBuffer is called when a parquet writer needs to acquire a new
// page buffer from the pool.
GetBuffer() io.ReadWriteSeeker
// PutBuffer is called when a parquet writer releases a page buffer to
// the pool.
//
// The parquet.Writer type guarantees that the buffers it calls this method
// with were previously acquired by a call to GetBuffer on the same
// pool, and that it will not use them anymore after the call.
PutBuffer(io.ReadWriteSeeker)
}
// NewBufferPool creates a new in-memory page buffer pool.
//
// The implementation is backed by sync.Pool and allocates memory buffers on the
// Go heap.
func NewBufferPool() BufferPool { return new(memoryBufferPool) }
type memoryBuffer struct {
data []byte
off int
}
func (p *memoryBuffer) Reset() {
p.data, p.off = p.data[:0], 0
}
func (p *memoryBuffer) Read(b []byte) (n int, err error) {
n = copy(b, p.data[p.off:])
p.off += n
if p.off == len(p.data) {
err = io.EOF
}
return n, err
}
func (p *memoryBuffer) Write(b []byte) (int, error) {
n := copy(p.data[p.off:cap(p.data)], b)
p.data = p.data[:p.off+n]
if n < len(b) {
p.data = append(p.data, b[n:]...)
}
p.off += len(b)
return len(b), nil
}
func (p *memoryBuffer) WriteTo(w io.Writer) (int64, error) {
n, err := w.Write(p.data[p.off:])
p.off += n
return int64(n), err
}
func (p *memoryBuffer) Seek(offset int64, whence int) (int64, error) {
switch whence {
case io.SeekCurrent:
offset += int64(p.off)
case io.SeekEnd:
offset += int64(len(p.data))
}
if offset < 0 {
return 0, fmt.Errorf("seek: negative offset: %d<0", offset)
}
if offset > int64(len(p.data)) {
offset = int64(len(p.data))
}
p.off = int(offset)
return offset, nil
}
type memoryBufferPool struct{ sync.Pool }
func (pool *memoryBufferPool) GetBuffer() io.ReadWriteSeeker {
b, _ := pool.Get().(*memoryBuffer)
if b == nil {
b = new(memoryBuffer)
} else {
b.Reset()
}
return b
}
func (pool *memoryBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
if b, _ := buf.(*memoryBuffer); b != nil {
pool.Put(b)
}
}
type fileBufferPool struct {
err error
tempdir string
pattern string
}
// NewFileBufferPool creates a new on-disk page buffer pool.
func NewFileBufferPool(tempdir, pattern string) BufferPool {
pool := &fileBufferPool{
tempdir: tempdir,
pattern: pattern,
}
pool.tempdir, pool.err = filepath.Abs(pool.tempdir)
return pool
}
func (pool *fileBufferPool) GetBuffer() io.ReadWriteSeeker {
if pool.err != nil {
return &errorBuffer{err: pool.err}
}
f, err := os.CreateTemp(pool.tempdir, pool.pattern)
if err != nil {
return &errorBuffer{err: err}
}
return f
}
func (pool *fileBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
if f, _ := buf.(*os.File); f != nil {
defer f.Close()
os.Remove(f.Name())
}
}
type errorBuffer struct{ err error }
func (buf *errorBuffer) Read([]byte) (int, error) { return 0, buf.err }
func (buf *errorBuffer) Write([]byte) (int, error) { return 0, buf.err }
func (buf *errorBuffer) ReadFrom(io.Reader) (int64, error) { return 0, buf.err }
func (buf *errorBuffer) WriteTo(io.Writer) (int64, error) { return 0, buf.err }
func (buf *errorBuffer) Seek(int64, int) (int64, error) { return 0, buf.err }
var (
defaultColumnBufferPool memoryBufferPool
defaultSortingBufferPool memoryBufferPool
_ io.ReaderFrom = (*errorBuffer)(nil)
_ io.WriterTo = (*errorBuffer)(nil)
_ io.WriterTo = (*memoryBuffer)(nil)
)
type readerAt struct {
reader io.ReadSeeker
offset int64
}
func (r *readerAt) ReadAt(b []byte, off int64) (int, error) {
if r.offset < 0 || off != r.offset {
off, err := r.reader.Seek(off, io.SeekStart)
if err != nil {
return 0, err
}
r.offset = off
}
n, err := r.reader.Read(b)
r.offset += int64(n)
return n, err
}
func newReaderAt(r io.ReadSeeker) io.ReaderAt {
if rr, ok := r.(io.ReaderAt); ok {
return rr
}
return &readerAt{reader: r, offset: -1}
}
================================================
FILE: buffer_pool_test.go
================================================
package parquet_test
import (
"bytes"
"io"
"strings"
"testing"
"testing/iotest"
"github.com/segmentio/parquet-go"
)
func TestBufferPool(t *testing.T) {
testBufferPool(t, parquet.NewBufferPool())
}
func TestFileBufferPool(t *testing.T) {
testBufferPool(t, parquet.NewFileBufferPool("/tmp", "buffers.*"))
}
func testBufferPool(t *testing.T, pool parquet.BufferPool) {
tests := []struct {
scenario string
function func(*testing.T, parquet.BufferPool)
}{
{
scenario: "write bytes",
function: testBufferPoolWriteBytes,
},
{
scenario: "write string",
function: testBufferPoolWriteString,
},
{
scenario: "copy to buffer",
function: testBufferPoolCopyToBuffer,
},
{
scenario: "copy from buffer",
function: testBufferPoolCopyFromBuffer,
},
}
for _, test := range tests {
t.Run(test.scenario, func(t *testing.T) { test.function(t, pool) })
}
}
func testBufferPoolWriteBytes(t *testing.T, pool parquet.BufferPool) {
const content = "Hello World!"
buffer := pool.GetBuffer()
defer pool.PutBuffer(buffer)
_, err := buffer.Write([]byte(content))
if err != nil {
t.Fatal(err)
}
assertBufferContent(t, buffer, content)
}
func testBufferPoolWriteString(t *testing.T, pool parquet.BufferPool) {
const content = "Hello World!"
buffer := pool.GetBuffer()
defer pool.PutBuffer(buffer)
_, err := io.WriteString(buffer, content)
if err != nil {
t.Fatal(err)
}
assertBufferContent(t, buffer, content)
}
func testBufferPoolCopyToBuffer(t *testing.T, pool parquet.BufferPool) {
const content = "ABC"
buffer := pool.GetBuffer()
defer pool.PutBuffer(buffer)
reader := strings.NewReader(content)
_, err := io.Copy(buffer, struct{ io.Reader }{reader})
if err != nil {
t.Fatal(err)
}
assertBufferContent(t, buffer, content)
}
func testBufferPoolCopyFromBuffer(t *testing.T, pool parquet.BufferPool) {
const content = "0123456789"
buffer := pool.GetBuffer()
defer pool.PutBuffer(buffer)
if _, err := io.WriteString(buffer, content); err != nil {
t.Fatal(err)
}
if _, err := buffer.Seek(0, io.SeekStart); err != nil {
t.Fatal(err)
}
writer := new(bytes.Buffer)
_, err := io.Copy(struct{ io.Writer }{writer}, buffer)
if err != nil {
t.Fatal(err)
}
assertBufferContent(t, bytes.NewReader(writer.Bytes()), content)
}
func assertBufferContent(t *testing.T, b io.ReadSeeker, s string) {
t.Helper()
offset, err := b.Seek(0, io.SeekStart)
if err != nil {
t.Error("seek:", err)
}
if offset != 0 {
t.Errorf("seek: invalid offset returned: want=0 got=%d", offset)
}
if err := iotest.TestReader(b, []byte(s)); err != nil {
t.Error("iotest:", err)
}
}
================================================
FILE: buffer_test.go
================================================
package parquet_test
import (
"bytes"
"errors"
"io"
"math"
"math/rand"
"reflect"
"sort"
"strconv"
"testing"
"github.com/segmentio/parquet-go"
"github.com/segmentio/parquet-go/encoding"
)
var bufferTests = [...]struct {
scenario string
typ parquet.Type
values [][]interface{}
}{
{
scenario: "boolean",
typ: parquet.BooleanType,
values: [][]interface{}{
{},
{false},
{true},
{
false, true, false, false, true, true,
false, false, false, true, false, true,
},
},
},
{
scenario: "int32",
typ: parquet.Int32Type,
values: [][]interface{}{
{},
{int32(0)},
{int32(1)},
{
int32(1), int32(2), int32(3), int32(4), int32(5), int32(6),
int32(math.MaxInt8), int32(math.MaxInt16), int32(math.MaxInt32),
int32(7), int32(9), int32(9), int32(0),
},
},
},
{
scenario: "int64",
typ: parquet.Int64Type,
values: [][]interface{}{
{},
{int64(0)},
{int64(1)},
{
int64(1), int64(2), int64(3), int64(4), int64(5), int64(6),
int64(math.MaxInt8), int64(math.MaxInt16), int64(math.MaxInt64), int64(7),
int64(9), int64(9), int64(0),
},
},
},
{
scenario: "float",
typ: parquet.FloatType,
values: [][]interface{}{
{},
{float32(0)},
{float32(1)},
{
float32(1), float32(2), float32(3), float32(4), float32(5), float32(6),
float32(0.5), float32(math.SmallestNonzeroFloat32), float32(math.MaxFloat32), float32(7),
float32(9), float32(9), float32(0),
},
},
},
{
scenario: "double",
typ: parquet.DoubleType,
values: [][]interface{}{
{},
{float64(0)},
{float64(1)},
{
float64(1), float64(2), float64(3), float64(4), float64(5), float64(6),
float64(0.5), float64(math.SmallestNonzeroFloat64), float64(math.MaxFloat64), float64(7),
float64(9), float64(9), float64(0),
},
},
},
{
scenario: "string",
typ: parquet.ByteArrayType,
values: [][]interface{}{
{},
{""},
{"Hello World!"},
{
"ABCDEFG", "HIJKLMN", "OPQRSTU", "VWXZY01", "2345678",
"90!@#$%", "^&*()_+", "Hello World!", "Answer=42", "ABCEDFG",
"HIJKLMN", "OPQRSTU", "VWXYZ",
},
},
},
{
scenario: "fixed length byte array",
typ: parquet.FixedLenByteArrayType(10),
values: [][]interface{}{
{},
{[10]byte{}},
{[10]byte{0: 1}},
{
[10]byte{0: 0}, [10]byte{0: 2}, [10]byte{0: 1}, [10]byte{0: 4}, [10]byte{0: 3},
[10]byte{0: 6}, [10]byte{0: 5}, [10]byte{0: 8}, [10]byte{0: 7}, [10]byte{0: 10},
[10]byte{0: 11}, [10]byte{0: 12}, [10]byte{9: 0xFF},
},
},
},
{
scenario: "uuid",
typ: parquet.UUID().Type(),
values: [][]interface{}{
{},
{[16]byte{}},
{[16]byte{0: 1}},
{
[16]byte{0: 0}, [16]byte{0: 2}, [16]byte{0: 1}, [16]byte{0: 4}, [16]byte{0: 3},
[16]byte{0: 6}, [16]byte{0: 5}, [16]byte{0: 8}, [16]byte{0: 7}, [16]byte{0: 10},
[16]byte{0: 11}, [16]byte{0: 12}, [16]byte{15: 0xFF},
},
},
},
{
scenario: "uint32",
typ: parquet.Uint(32).Type(),
values: [][]interface{}{
{},
{uint32(0)},
{uint32(1)},
{
uint32(1), uint32(2), uint32(3), uint32(4), uint32(5), uint32(6),
uint32(math.MaxInt8), uint32(math.MaxInt16), uint32(math.MaxUint32), uint32(7),
uint32(9), uint32(9), uint32(0),
},
},
},
{
scenario: "uint64",
typ: parquet.Uint(64).Type(),
values: [][]interface{}{
{},
{uint64(0)},
{uint64(1)},
{
uint64(1), uint64(2), uint64(3), uint64(4), uint64(5), uint64(6),
uint64(math.MaxInt8), uint64(math.MaxInt16), uint64(math.MaxUint64),
uint64(7), uint64(9), uint64(9), uint64(0),
},
},
},
}
func TestBuffer(t *testing.T) {
for _, test := range bufferTests {
t.Run(test.scenario, func(t *testing.T) {
for _, config := range [...]struct {
scenario string
typ parquet.Type
}{
{scenario: "plain", typ: test.typ},
{scenario: "indexed", typ: test.typ.NewDictionary(0, 0, test.typ.NewValues(nil, nil)).Type()},
} {
t.Run(config.scenario, func(t *testing.T) {
for _, mod := range [...]struct {
scenario string
function func(parquet.Node) parquet.Node
}{
{scenario: "optional", function: parquet.Optional},
{scenario: "repeated", function: parquet.Repeated},
{scenario: "required", function: parquet.Required},
} {
t.Run(mod.scenario, func(t *testing.T) {
for _, ordering := range [...]struct {
scenario string
sorting parquet.SortingColumn
sortFunc func(parquet.Type, []parquet.Value)
}{
{scenario: "unordered", sorting: nil, sortFunc: unordered},
{scenario: "ascending", sorting: parquet.Ascending("data"), sortFunc: ascending},
{scenario: "descending", sorting: parquet.Descending("data"), sortFunc: descending},
} {
t.Run(ordering.scenario, func(t *testing.T) {
schema := parquet.NewSchema("test", parquet.Group{
"data": mod.function(parquet.Leaf(config.typ)),
})
options := []parquet.RowGroupOption{
schema,
parquet.ColumnBufferCapacity(100),
}
if ordering.sorting != nil {
options = append(options,
parquet.SortingRowGroupConfig(
parquet.SortingColumns(ordering.sorting),
),
)
}
content := new(bytes.Buffer)
buffer := parquet.NewBuffer(options...)
for _, values := range test.values {
t.Run("", func(t *testing.T) {
defer content.Reset()
defer buffer.Reset()
fields := schema.Fields()
testBuffer(t, fields[0], buffer, &parquet.Plain, values, ordering.sortFunc)
})
}
})
}
})
}
})
}
})
}
}
type sortFunc func(parquet.Type, []parquet.Value)
func unordered(typ parquet.Type, values []parquet.Value) {}
func ascending(typ parquet.Type, values []parquet.Value) {
sort.Slice(values, func(i, j int) bool { return typ.Compare(values[i], values[j]) < 0 })
}
func descending(typ parquet.Type, values []parquet.Value) {
sort.Slice(values, func(i, j int) bool { return typ.Compare(values[i], values[j]) > 0 })
}
func testBuffer(t *testing.T, node parquet.Node, buffer *parquet.Buffer, encoding encoding.Encoding, values []interface{}, sortFunc sortFunc) {
repetitionLevel := 0
definitionLevel := 0
if !node.Required() {
definitionLevel = 1
}
minValue := parquet.Value{}
maxValue := parquet.Value{}
batch := make([]parquet.Value, len(values))
for i := range values {
batch[i] = parquet.ValueOf(values[i]).Level(repetitionLevel, definitionLevel, 0)
}
for i := range batch {
_, err := buffer.WriteRows([]parquet.Row{batch[i : i+1]})
if err != nil {
t.Fatalf("writing value to row group: %v", err)
}
}
numRows := buffer.NumRows()
if numRows != int64(len(batch)) {
t.Fatalf("number of rows mismatch: want=%d got=%d", len(batch), numRows)
}
typ := node.Type()
for _, value := range batch {
if minValue.IsNull() || typ.Compare(value, minValue) < 0 {
minValue = value
}
if maxValue.IsNull() || typ.Compare(value, maxValue) > 0 {
maxValue = value
}
}
sortFunc(typ, batch)
sort.Sort(buffer)
page := buffer.ColumnBuffers()[0].Page()
numValues := page.NumValues()
if numValues != int64(len(batch)) {
t.Fatalf("number of values mistmatch: want=%d got=%d", len(batch), numValues)
}
numNulls := page.NumNulls()
if numNulls != 0 {
t.Fatalf("number of nulls mismatch: want=0 got=%d", numNulls)
}
min, max, hasBounds := page.Bounds()
if !hasBounds && numRows > 0 {
t.Fatal("page bounds are missing")
}
if !parquet.Equal(min, minValue) {
t.Fatalf("min value mismatch: want=%v got=%v", minValue, min)
}
if !parquet.Equal(max, maxValue) {
t.Fatalf("max value mismatch: want=%v got=%v", maxValue, max)
}
// We write a single value per row, so num values = num rows for all pages
// including repeated ones, which makes it OK to slice the pages using the
// number of values as a proxy for the row indexes.
halfValues := numValues / 2
for _, test := range [...]struct {
scenario string
values []parquet.Value
reader parquet.ValueReader
}{
{"page", batch, page.Values()},
{"head", batch[:halfValues], page.Slice(0, halfValues).Values()},
{"tail", batch[halfValues:], page.Slice(halfValues, numValues).Values()},
} {
v := [1]parquet.Value{}
i := 0
for {
n, err := test.reader.ReadValues(v[:])
if n > 0 {
if n != 1 {
t.Fatalf("reading value from %q reader returned the wrong count: want=1 got=%d", test.scenario, n)
}
if i < len(test.values) {
if !parquet.Equal(v[0], test.values[i]) {
t.Fatalf("%q value at index %d mismatches: want=%v got=%v", test.scenario, i, test.values[i], v[0])
}
}
i++
}
if err != nil {
if err == io.EOF {
break
}
t.Fatalf("reading value from %q reader: %v", test.scenario, err)
}
}
if i != len(test.values) {
t.Errorf("wrong number of values read from %q reader: want=%d got=%d", test.scenario, len(test.values), i)
}
}
}
func TestBufferGenerateBloomFilters(t *testing.T) {
type Point3D struct {
X float64
Y float64
Z float64
}
f := func(rows []Point3D) bool {
if len(rows) == 0 { // TODO: support writing files with no rows
return true
}
output := new(bytes.Buffer)
buffer := parquet.NewBuffer()
writer := parquet.NewWriter(output,
parquet.BloomFilters(
parquet.SplitBlockFilter(10, "X"),
parquet.SplitBlockFilter(10, "Y"),
parquet.SplitBlockFilter(10, "Z"),
),
)
for i := range rows {
buffer.Write(&rows[i])
}
_, err := copyRowsAndClose(writer, buffer.Rows())
if err != nil {
t.Error(err)
return false
}
if err := writer.Close(); err != nil {
t.Error(err)
return false
}
reader := bytes.NewReader(output.Bytes())
f, err := parquet.OpenFile(reader, reader.Size())
if err != nil {
t.Error(err)
return false
}
rowGroup := f.RowGroups()[0]
columns := rowGroup.ColumnChunks()
x := columns[0]
y := columns[1]
z := columns[2]
for i, col := range []parquet.ColumnChunk{x, y, z} {
if col.BloomFilter() == nil {
t.Errorf("column %d has no bloom filter despite being configured to have one", i)
return false
}
}
fx := x.BloomFilter()
fy := y.BloomFilter()
fz := z.BloomFilter()
test := func(f parquet.BloomFilter, v float64) bool {
if ok, err := f.Check(parquet.ValueOf(v)); err != nil {
t.Errorf("unexpected error checking bloom filter: %v", err)
return false
} else if !ok {
t.Errorf("bloom filter does not contain value %g", v)
return false
}
return true
}
for _, row := range rows {
if !test(fx, row.X) || !test(fy, row.Y) || !test(fz, row.Z) {
return false
}
}
return true
}
if err := quickCheck(f); err != nil {
t.Error(err)
}
}
func TestBufferRoundtripNestedRepeated(t *testing.T) {
type C struct {
D int
}
type B struct {
C []C
}
type A struct {
B []B
}
// Write enough objects to exceed first page
buffer := parquet.NewBuffer()
var objs []A
for i := 0; i < 6; i++ {
o := A{[]B{{[]C{
{i},
{i},
}}}}
buffer.Write(&o)
objs = append(objs, o)
}
buf := new(bytes.Buffer)
w := parquet.NewWriter(buf, parquet.PageBufferSize(100))
w.WriteRowGroup(buffer)
w.Flush()
w.Close()
file := bytes.NewReader(buf.Bytes())
r := parquet.NewReader(file)
for i := 0; ; i++ {
o := new(A)
err := r.Read(o)
if errors.Is(err, io.EOF) {
if i < len(objs) {
t.Errorf("too few rows were read: %d<%d", i, len(objs))
}
break
}
if !reflect.DeepEqual(*o, objs[i]) {
t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
}
}
}
func TestBufferRoundtripNestedRepeatedPointer(t *testing.T) {
type C struct {
D *int
}
type B struct {
C []C
}
type A struct {
B []B
}
// Write enough objects to exceed first page
buffer := parquet.NewBuffer()
var objs []A
for i := 0; i < 6; i++ {
j := i
o := A{[]B{{[]C{
{&j},
{nil},
}}}}
buffer.Write(&o)
objs = append(objs, o)
}
buf := new(bytes.Buffer)
w := parquet.NewWriter(buf, parquet.PageBufferSize(100))
w.WriteRowGroup(buffer)
w.Flush()
w.Close()
file := bytes.NewReader(buf.Bytes())
r := parquet.NewReader(file)
for i := 0; ; i++ {
o := new(A)
err := r.Read(o)
if err == io.EOF {
break
}
if !reflect.DeepEqual(*o, objs[i]) {
t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
}
}
}
func TestRoundtripNestedRepeatedBytes(t *testing.T) {
type B struct {
C []byte
}
type A struct {
A string
B []B
}
var objs []A
for i := 0; i < 2; i++ {
o := A{
"test" + strconv.Itoa(i),
[]B{
{[]byte{byte(i)}},
},
}
objs = append(objs, o)
}
buf := new(bytes.Buffer)
w := parquet.NewWriter(buf, parquet.PageBufferSize(100))
for _, o := range objs {
w.Write(&o)
}
w.Close()
file := bytes.NewReader(buf.Bytes())
r := parquet.NewReader(file)
for i := 0; ; i++ {
o := new(A)
err := r.Read(o)
if errors.Is(err, io.EOF) {
if i < len(objs) {
t.Errorf("too few rows were read: %d<%d", i, len(objs))
}
break
}
if !reflect.DeepEqual(*o, objs[i]) {
t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
}
}
}
func TestBufferSeekToRow(t *testing.T) {
type B struct {
I int
C []string
}
type A struct {
B []B
}
buffer := parquet.NewBuffer()
var objs []A
for i := 0; i < 2; i++ {
o := A{
B: []B{
{I: i, C: []string{"foo", strconv.Itoa(i)}},
{I: i + 1, C: []string{"bar", strconv.Itoa(i + 1)}},
},
}
buffer.Write(&o)
objs = append(objs, o)
}
buf := new(bytes.Buffer)
w := parquet.NewWriter(buf)
w.WriteRowGroup(buffer)
w.Flush()
w.Close()
file := bytes.NewReader(buf.Bytes())
r := parquet.NewReader(file)
i := 1
o := new(A)
if err := r.SeekToRow(int64(i)); err != nil {
t.Fatal(err)
}
if err := r.Read(o); err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(*o, objs[i]) {
t.Errorf("points mismatch at row index %d: want=%v got=%v", i, objs[i], o)
}
}
type TestStruct struct {
A *string `parquet:"a,optional,dict"`
}
func TestOptionalDictWriteRowGroup(t *testing.T) {
s := parquet.SchemaOf(&TestStruct{})
str1 := "test1"
str2 := "test2"
records := []*TestStruct{
{A: nil},
{A: &str1},
{A: nil},
{A: &str2},
{A: nil},
}
buf := parquet.NewBuffer(s)
for _, rec := range records {
row := s.Deconstruct(nil, rec)
_, err := buf.WriteRows([]parquet.Row{row})
if err != nil {
t.Fatal(err)
}
}
b := bytes.NewBuffer(nil)
w := parquet.NewWriter(b)
_, err := w.WriteRowGroup(buf)
if err != nil {
t.Fatal(err)
}
}
func TestNullsSortFirst(t *testing.T) {
s := parquet.SchemaOf(&TestStruct{})
str1 := "test1"
str2 := "test2"
records := []*TestStruct{
{A: &str1},
{A: nil},
{A: &str2},
}
buf := parquet.NewBuffer(
s,
parquet.SortingRowGroupConfig(parquet.SortingColumns(parquet.NullsFirst(parquet.Ascending(s.Columns()[0][0])))),
)
for _, rec := range records {
row := s.Deconstruct(nil, rec)
_, err := buf.WriteRows([]parquet.Row{row})
if err != nil {
t.Fatal(err)
}
}
sort.Sort(buf)
rows := buf.Rows()
defer rows.Close()
rowBuf := make([]parquet.Row, len(records))
if _, err := rows.ReadRows(rowBuf); err != nil {
t.Fatal(err)
}
resultRecords := make([]TestStruct, len(records))
for i, r := range rowBuf {
if err := s.Reconstruct(&resultRecords[i], r); err != nil {
t.Fatal(err)
}
}
if resultRecords[0].A != nil {
t.Fatal("expected null to sort first, but found", resultRecords)
}
}
func generateBenchmarkBufferRows(n int) (*parquet.Schema, []parquet.Row) {
model := new(benchmarkRowType)
schema := parquet.SchemaOf(model)
prng := rand.New(rand.NewSource(0))
rows := make([]parquet.Row, n)
for i := range rows {
io.ReadFull(prng, model.ID[:])
model.Value = prng.Float64()
rows[i] = make(parquet.Row, 0, 2)
rows[i] = schema.Deconstruct(rows[i], model)
}
return schema, rows
}
func BenchmarkBufferReadRows100x(b *testing.B) {
schema, rows := generateBenchmarkBufferRows(benchmarkNumRows)
buffer := parquet.NewBuffer(schema)
for i := 0; i < len(rows); i += benchmarkRowsPerStep {
j := i + benchmarkRowsPerStep
if _, err := buffer.WriteRows(rows[i:j]); err != nil {
b.Fatal(err)
}
}
bufferRows := buffer.Rows()
defer bufferRows.Close()
benchmarkRowsPerSecond(b, func() int {
n, err := bufferRows.ReadRows(rows[:benchmarkRowsPerStep])
if err != nil {
if errors.Is(err, io.EOF) {
err = bufferRows.SeekToRow(0)
}
if err != nil {
b.Fatal(err)
}
}
return n
})
}
func BenchmarkBufferWriteRows100x(b *testing.B) {
schema, rows := generateBenchmarkBufferRows(benchmarkNumRows)
buffer := parquet.NewBuffer(schema)
i := 0
benchmarkRowsPerSecond(b, func() int {
n, err := buffer.WriteRows(rows[i : i+benchmarkRowsPerStep])
if err != nil {
b.Fatal(err)
}
i += benchmarkRowsPerStep
i %= benchmarkNumRows
if i == 0 {
buffer.Reset()
}
return n
})
}
================================================
FILE: column.go
================================================
package parquet
import (
"encoding/binary"
"fmt"
"io"
"reflect"
"github.com/segmentio/parquet-go/compress"
"github.com/segmentio/parquet-go/deprecated"
"github.com/segmentio/parquet-go/encoding"
"github.com/segmentio/parquet-go/format"
"github.com/segmentio/parquet-go/internal/unsafecast"
)
// Column represents a column in a parquet file.
//
// Methods of Column values are safe to call concurrently from multiple
// goroutines.
//
// Column instances satisfy the Node interface.
type Column struct {
typ Type
file *File
schema *format.SchemaElement
order *format.ColumnOrder
path columnPath
columns []*Column
chunks []*format.ColumnChunk
columnIndex []*format.ColumnIndex
offsetIndex []*format.OffsetIndex
encoding encoding.Encoding
compression compress.Codec
depth int8
maxRepetitionLevel byte
maxDefinitionLevel byte
index int16
}
// Type returns the type of the column.
//
// The returned value is unspecified if c is not a leaf column.
func (c *Column) Type() Type { return c.typ }
// Optional returns true if the column is optional.
func (c *Column) Optional() bool { return schemaRepetitionTypeOf(c.schema) == format.Optional }
// Repeated returns true if the column may repeat.
func (c *Column) Repeated() bool { return schemaRepetitionTypeOf(c.schema) == format.Repeated }
// Required returns true if the column is required.
func (c *Column) Required() bool { return schemaRepetitionTypeOf(c.schema) == format.Required }
// Leaf returns true if c is a leaf column.
func (c *Column) Leaf() bool { return c.index >= 0 }
// Fields returns the list of fields on the column.
func (c *Column) Fields() []Field {
fields := make([]Field, len(c.columns))
for i, column := range c.columns {
fields[i] = column
}
return fields
}
// Encoding returns the encodings used by this column.
func (c *Column) Encoding() encoding.Encoding { return c.encoding }
// Compression returns the compression codecs used by this column.
func (c *Column) Compression() compress.Codec { return c.compression }
// Path of the column in the parquet schema.
func (c *Column) Path() []string { return c.path[1:] }
// Name returns the column name.
func (c *Column) Name() string { return c.schema.Name }
// Columns returns the list of child columns.
//
// The method returns the same slice across multiple calls, the program must
// treat it as a read-only value.
func (c *Column) Columns() []*Column { return c.columns }
// Column returns the child column matching the given name.
func (c *Column) Column(name string) *Column {
for _, child := range c.columns {
if child.Name() == name {
return child
}
}
return nil
}
// Pages returns a reader exposing all pages in this column, across row groups.
func (c *Column) Pages() Pages {
if c.index < 0 {
return emptyPages{}
}
r := &columnPages{
pages: make([]filePages, len(c.file.rowGroups)),
}
for i := range r.pages {
r.pages[i].init(c.file.rowGroups[i].(*fileRowGroup).columns[c.index].(*fileColumnChunk))
}
return r
}
type columnPages struct {
pages []filePages
index int
}
func (c *columnPages) ReadPage() (Page, error) {
for {
if c.index >= len(c.pages) {
return nil, io.EOF
}
p, err := c.pages[c.index].ReadPage()
if err == nil || err != io.EOF {
return p, err
}
c.index++
}
}
func (c *columnPages) SeekToRow(rowIndex int64) error {
c.index = 0
for c.index < len(c.pages) && c.pages[c.index].chunk.rowGroup.NumRows >= rowIndex {
rowIndex -= c.pages[c.index].chunk.rowGroup.NumRows
c.index++
}
if c.index < len(c.pages) {
if err := c.pages[c.index].SeekToRow(rowIndex); err != nil {
return err
}
for i := range c.pages[c.index:] {
p := &c.pages[c.index+i]
if err := p.SeekToRow(0); err != nil {
return err
}
}
}
return nil
}
func (c *columnPages) Close() error {
var lastErr error
for i := range c.pages {
if err := c.pages[i].Close(); err != nil {
lastErr = err
}
}
c.pages = nil
c.index = 0
return lastErr
}
// Depth returns the position of the column relative to the root.
func (c *Column) Depth() int { return int(c.depth) }
// MaxRepetitionLevel returns the maximum value of repetition levels on this
// column.
func (c *Column) MaxRepetitionLevel() int { return int(c.maxRepetitionLevel) }
// MaxDefinitionLevel returns the maximum value of definition levels on this
// column.
func (c *Column) MaxDefinitionLevel() int { return int(c.maxDefinitionLevel) }
// Index returns the position of the column in a row. Only leaf columns have a
// column index, the method returns -1 when called on non-leaf columns.
func (c *Column) Index() int { return int(c.index) }
// GoType returns the Go type that best represents the parquet column.
func (c *Column) GoType() reflect.Type { return goTypeOf(c) }
// Value returns the sub-value in base for the child column at the given
// index.
func (c *Column) Value(base reflect.Value) reflect.Value {
return base.MapIndex(reflect.ValueOf(&c.schema.Name).Elem())
}
// String returns a human-readable string representation of the column.
func (c *Column) String() string { return c.path.String() + ": " + sprint(c.Name(), c) }
func (c *Column) forEachLeaf(do func(*Column)) {
if len(c.columns) == 0 {
do(c)
} else {
for _, child := range c.columns {
child.forEachLeaf(do)
}
}
}
func openColumns(file *File) (*Column, error) {
cl := columnLoader{}
c, err := cl.open(file, nil)
if err != nil {
return nil, err
}
// Validate that there aren't extra entries in the row group columns,
// which would otherwise indicate that there are dangling data pages
// in the file.
for index, rowGroup := range file.metadata.RowGroups {
if cl.rowGroupColumnIndex != len(rowGroup.Columns) {
return nil, fmt.Errorf("row group at index %d contains %d columns but %d were referenced by the column schemas",
index, len(rowGroup.Columns), cl.rowGroupColumnIndex)
}
}
_, err = c.setLevels(0, 0, 0, 0)
return c, err
}
func (c *Column) setLevels(depth, repetition, definition, index int) (int, error) {
if depth > MaxColumnDepth {
return -1, fmt.Errorf("cannot represent parquet columns with more than %d nested levels: %s", MaxColumnDepth, c.path)
}
if index > MaxColumnIndex {
return -1, fmt.Errorf("cannot represent parquet rows with more than %d columns: %s", MaxColumnIndex, c.path)
}
if repetition > MaxRepetitionLevel {
return -1, fmt.Errorf("cannot represent parquet columns with more than %d repetition levels: %s", MaxRepetitionLevel, c.path)
}
if definition > MaxDefinitionLevel {
return -1, fmt.Errorf("cannot represent parquet columns with more than %d definition levels: %s", MaxDefinitionLevel, c.path)
}
switch schemaRepetitionTypeOf(c.schema) {
case format.Optional:
definition++
case format.Repeated:
repetition++
definition++
}
c.depth = int8(depth)
c.maxRepetitionLevel = byte(repetition)
c.maxDefinitionLevel = byte(definition)
depth++
if len(c.columns) > 0 {
c.index = -1
} else {
c.index = int16(index)
index++
}
var err error
for _, child := range c.columns {
if index, err = child.setLevels(depth, repetition, definition, index); err != nil {
return -1, err
}
}
return index, nil
}
type columnLoader struct {
schemaIndex int
columnOrderIndex int
rowGroupColumnIndex int
}
func (cl *columnLoader) open(file *File, path []string) (*Column, error) {
c := &Column{
file: file,
schema: &file.metadata.Schema[cl.schemaIndex],
}
c.path = columnPath(path).append(c.schema.Name)
cl.schemaIndex++
numChildren := int(c.schema.NumChildren)
if numChildren == 0 {
c.typ = schemaElementTypeOf(c.schema)
if cl.columnOrderIndex < len(file.metadata.ColumnOrders) {
c.order = &file.metadata.ColumnOrders[cl.columnOrderIndex]
cl.columnOrderIndex++
}
rowGroups := file.metadata.RowGroups
rowGroupColumnIndex := cl.rowGroupColumnIndex
cl.rowGroupColumnIndex++
c.chunks = make([]*format.ColumnChunk, 0, len(rowGroups))
c.columnIndex = make([]*format.ColumnIndex, 0, len(rowGroups))
c.offsetIndex = make([]*format.OffsetIndex, 0, len(rowGroups))
for i, rowGroup := range rowGroups {
if rowGroupColumnIndex >= len(rowGroup.Columns) {
return nil, fmt.Errorf("row group at index %d does not have enough columns", i)
}
c.chunks = append(c.chunks, &rowGroup.Columns[rowGroupColumnIndex])
}
if len(file.columnIndexes) > 0 {
for i := range rowGroups {
if rowGroupColumnIndex >= len(file.columnIndexes) {
return nil, fmt.Errorf("row group at index %d does not have enough column index pages", i)
}
c.columnIndex = append(c.columnIndex, &file.columnIndexes[rowGroupColumnIndex])
}
}
if len(file.offsetIndexes) > 0 {
for i := range rowGroups {
if rowGroupColumnIndex >= len(file.offsetIndexes) {
return nil, fmt.Errorf("row group at index %d does not have enough offset index pages", i)
}
c.offsetIndex = append(c.offsetIndex, &file.offsetIndexes[rowGroupColumnIndex])
}
}
if len(c.chunks) > 0 {
// Pick the encoding and compression codec of the first chunk.
//
// Technically each column chunk may use a different compression
// codec, and each page of the column chunk might have a different
// encoding. Exposing these details does not provide a lot of value
// to the end user.
//
// Programs that wish to determine the encoding and compression of
// each page of the column should iterate through the pages and read
// the page headers to determine which compression and encodings are
// applied.
for _, encoding := range c.chunks[0].MetaData.Encoding {
if c.encoding == nil {
c.encoding = LookupEncoding(encoding)
}
if encoding != format.Plain && encoding != format.RLE {
c.encoding = LookupEncoding(encoding)
break
}
}
c.compression = LookupCompressionCodec(c.chunks[0].MetaData.Codec)
}
return c, nil
}
c.typ = &groupType{}
c.columns = make([]*Column, numChildren)
for i := range c.columns {
if cl.schemaIndex >= len(file.metadata.Schema) {
return nil, fmt.Errorf("column %q has more children than there are schemas in the file: %d > %d",
c.schema.Name, cl.schemaIndex+1, len(file.metadata.Schema))
}
var err error
c.columns[i], err = cl.open(file, c.path)
if err != nil {
return nil, fmt.Errorf("%s: %w", c.schema.Name, err)
}
}
return c, nil
}
func schemaElementTypeOf(s *format.SchemaElement) Type {
if lt := s.LogicalType; lt != nil {
// A logical type exists, the Type interface implementations in this
// package are all based on the logical parquet types declared in the
// format sub-package so we can return them directly via a pointer type
// conversion.
switch {
case lt.UTF8 != nil:
return (*stringType)(lt.UTF8)
case lt.Map != nil:
return (*mapType)(lt.Map)
case lt.List != nil:
return (*listType)(lt.List)
case lt.Enum != nil:
return (*enumType)(lt.Enum)
case lt.Decimal != nil:
// A parquet decimal can be one of several different physical types.
if t := s.Type; t != nil {
var typ Type
switch kind := Kind(*s.Type); kind {
case Int32:
typ = Int32Type
case Int64:
typ = Int64Type
case FixedLenByteArray:
if s.TypeLength == nil {
panic("DECIMAL using FIXED_LEN_BYTE_ARRAY must specify a length")
}
typ = FixedLenByteArrayType(int(*s.TypeLength))
default:
panic("DECIMAL must be of type INT32, INT64, or FIXED_LEN_BYTE_ARRAY but got " + kind.String())
}
return &decimalType{
decimal: *lt.Decimal,
Type: typ,
}
}
case lt.Date != nil:
return (*dateType)(lt.Date)
case lt.Time != nil:
return (*timeType)(lt.Time)
case lt.Timestamp != nil:
return (*timestampType)(lt.Timestamp)
case lt.Integer != nil:
return (*intType)(lt.Integer)
case lt.Unknown != nil:
return (*nullType)(lt.Unknown)
case lt.Json != nil:
return (*jsonType)(lt.Json)
case lt.Bson != nil:
return (*bsonType)(lt.Bson)
case lt.UUID != nil:
return (*uuidType)(lt.UUID)
}
}
if ct := s.ConvertedType; ct != nil {
// This column contains no logical type but has a converted type, it
// was likely created by an older parquet writer. Convert the legacy
// type representation to the equivalent logical parquet type.
switch *ct {
case deprecated.UTF8:
return &stringType{}
case deprecated.Map:
return &mapType{}
case deprecated.MapKeyValue:
return &groupType{}
case deprecated.List:
return &listType{}
case deprecated.Enum:
return &enumType{}
case deprecated.Decimal:
if s.Scale != nil && s.Precision != nil {
// A parquet decimal can be one of several different physical types.
if t := s.Type; t != nil {
var typ Type
switch kind := Kind(*s.Type); kind {
case Int32:
typ = Int32Type
case Int64:
typ = Int64Type
case FixedLenByteArray:
if s.TypeLength == nil {
panic("DECIMAL using FIXED_LEN_BYTE_ARRAY must specify a length")
}
typ = FixedLenByteArrayType(int(*s.TypeLength))
case ByteArray:
typ = ByteArrayType
default:
panic("DECIMAL must be of type INT32, INT64, BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY but got " + kind.String())
}
return &decimalType{
decimal: format.DecimalType{
Scale: *s.Scale,
Precision: *s.Precision,
},
Type: typ,
}
}
}
case deprecated.Date:
return &dateType{}
case deprecated.TimeMillis:
return &timeType{IsAdjustedToUTC: true, Unit: Millisecond.TimeUnit()}
case deprecated.TimeMicros:
return &timeType{IsAdjustedToUTC: true, Unit: Microsecond.TimeUnit()}
case deprecated.TimestampMillis:
return ×tampType{IsAdjustedToUTC: true, Unit: Millisecond.TimeUnit()}
case deprecated.TimestampMicros:
return ×tampType{IsAdjustedToUTC: true, Unit: Microsecond.TimeUnit()}
case deprecated.Uint8:
return &unsignedIntTypes[0]
case deprecated.Uint16:
return &unsignedIntTypes[1]
case deprecated.Uint32:
return &unsignedIntTypes[2]
case deprecated.Uint64:
return &unsignedIntTypes[3]
case deprecated.Int8:
return &signedIntTypes[0]
case deprecated.Int16:
return &signedIntTypes[1]
case deprecated.Int32:
return &signedIntTypes[2]
case deprecated.Int64:
return &signedIntTypes[3]
case deprecated.Json:
return &jsonType{}
case deprecated.Bson:
return &bsonType{}
case deprecated.Interval:
// TODO
}
}
if t := s.Type; t != nil {
// The column only has a physical type, convert it to one of the
// primitive types supported by this package.
switch kind := Kind(*t); kind {
case Boolean:
return BooleanType
case Int32:
return Int32Type
case Int64:
return Int64Type
case Int96:
return Int96Type
case Float:
return FloatType
case Double:
return DoubleType
case ByteArray:
return ByteArrayType
case FixedLenByteArray:
if s.TypeLength != nil {
return FixedLenByteArrayType(int(*s.TypeLength))
}
}
}
// If we reach this point, we are likely reading a parquet column that was
// written with a non-standard type or is in a newer version of the format
// than this package supports.
return &nullType{}
}
func schemaRepetitionTypeOf(s *format.SchemaElement) format.FieldRepetitionType {
if s.RepetitionType != nil {
return *s.RepetitionType
}
return format.Required
}
func (c *Column) decompress(compressedPageData []byte, uncompressedPageSize int32) (page *buffer, err error) {
page = buffers.get(int(uncompressedPageSize))
page.data, err = c.compression.Decode(page.data, compressedPageData)
if err != nil {
page.unref()
page = nil
}
return page, err
}
// DecodeDataPageV1 decodes a data page from the header, compressed data, and
// optional dictionary passed as arguments.
func (c *Column) DecodeDataPageV1(header DataPageHeaderV1, page []byte, dict Dictionary) (Page, error) {
return c.decodeDataPageV1(header, &buffer{data: page}, dict, -1)
}
func (c *Column) decodeDataPageV1(header DataPageHeaderV1, page *buffer, dict Dictionary, size int32) (Page, error) {
var pageData = page.data
var err error
if isCompressed(c.compression) {
if page, err = c.decompress(pageData, size); err != nil {
return nil, fmt.Errorf("decompressing data page v1: %w", err)
}
defer page.unref()
pageData = page.data
}
var numValues = int(header.NumValues())
var repetitionLevels *buffer
var definitionLevels *buffer
if c.maxRepetitionLevel > 0 {
encoding := lookupLevelEncoding(header.RepetitionLevelEncoding(), c.maxRepetitionLevel)
repetitionLevels, pageData, err = decodeLevelsV1(encoding, numValues, pageData)
if err != nil {
return nil, fmt.Errorf("decoding repetition levels of data page v1: %w", err)
}
defer repetitionLevels.unref()
}
if c.maxDefinitionLevel > 0 {
encoding := lookupLevelEncoding(header.DefinitionLevelEncoding(), c.maxDefinitionLevel)
definitionLevels, pageData, err = decodeLevelsV1(encoding, numValues, pageData)
if err != nil {
return nil, fmt.Errorf("decoding definition levels of data page v1: %w", err)
}
defer definitionLevels.unref()
// Data pages v1 did not embed the number of null values,
// so we have to compute it from the definition levels.
numValues -= countLevelsNotEqual(definitionLevels.data, c.maxDefinitionLevel)
}
return c.decodeDataPage(header, numValues, repetitionLevels, definitionLevels, page, pageData, dict)
}
// DecodeDataPageV2 decodes a data page from the header, compressed data, and
// optional dictionary passed as arguments.
func (c *Column) DecodeDataPageV2(header DataPageHeaderV2, page []byte, dict Dictionary) (Page, error) {
return c.decodeDataPageV2(header, &buffer{data: page}, dict, -1)
}
func (c *Column) decodeDataPageV2(header DataPageHeaderV2, page *buffer, dict Dictionary, size int32) (Page, error) {
var numValues = int(header.NumValues())
var pageData = page.data
var err error
var repetitionLevels *buffer
var definitionLevels *buffer
if length := header.RepetitionLevelsByteLength(); length > 0 {
if c.maxRepetitionLevel == 0 {
// In some cases we've observed files which have a non-zero
// repetition level despite the column not being repeated
// (nor nested within a repeated column).
//
// See https://github.com/apache/parquet-testing/pull/24
pageData, err = skipLevelsV2(pageData, length)
} else {
encoding := lookupLevelEncoding(header.RepetitionLevelEncoding(), c.maxRepetitionLevel)
repetitionLevels, pageData, err = decodeLevelsV2(encoding, numValues, pageData, length)
}
if err != nil {
return nil, fmt.Errorf("decoding repetition levels of data page v2: %w", io.ErrUnexpectedEOF)
}
if repetitionLevels != nil {
defer repetitionLevels.unref()
}
}
if length := header.DefinitionLevelsByteLength(); length > 0 {
if c.maxDefinitionLevel == 0 {
pageData, err = skipLevelsV2(pageData, length)
} else {
encoding := lookupLevelEncoding(header.DefinitionLevelEncoding(), c.maxDefinitionLevel)
definitionLevels, pageData, err = decodeLevelsV2(encoding, numValues, pageData, length)
}
if err != nil {
return nil, fmt.Errorf("decoding definition levels of data page v2: %w", io.ErrUnexpectedEOF)
}
if definitionLevels != nil {
defer definitionLevels.unref()
}
}
if isCompressed(c.compression) && header.IsCompressed() {
if page, err = c.decompress(pageData, size); err != nil {
return nil, fmt.Errorf("decompressing data page v2: %w", err)
}
defer page.unref()
pageData = page.data
}
numValues -= int(header.NumNulls())
return c.decodeDataPage(header, numValues, repetitionLevels, definitionLevels, page, pageData, dict)
}
func (c *Column) decodeDataPage(header DataPageHeader, numValues int, repetitionLevels, definitionLevels, page *buffer, data []byte, dict Dictionary) (Page, error) {
pageEncoding := LookupEncoding(header.Encoding())
pageType := c.Type()
if isDictionaryEncoding(pageEncoding) {
// In some legacy configurations, the PLAIN_DICTIONARY encoding is used
// on data page headers to indicate that the page contains indexes into
// the dictionary page, but the page is still encoded using the RLE
// encoding in this case, so we convert it to RLE_DICTIONARY.
pageEncoding = &RLEDictionary
pageType = indexedPageType{newIndexedType(pageType, dict)}
}
var vbuf, obuf *buffer
var pageValues []byte
var pageOffsets []uint32
if pageEncoding.CanDecodeInPlace() {
vbuf = page
pageValues = data
} else {
vbuf = buffers.get(pageType.EstimateDecodeSize(numValues, data, pageEncoding))
defer vbuf.unref()
pageValues = vbuf.data
}
// Page offsets not needed when dictionary-encoded
if pageType.Kind() == ByteArray && !isDictionaryEncoding(pageEncoding) {
obuf = buffers.get(4 * (numValues + 1))
defer obuf.unref()
pageOffsets = unsafecast.BytesToUint32(obuf.data)
}
values := pageType.NewValues(pageValues, pageOffsets)
values, err := pageType.Decode(values, data, pageEncoding)
if err != nil {
return nil, err
}
newPage := pageType.NewPage(c.Index(), numValues, values)
switch {
case c.maxRepetitionLevel > 0:
newPage = newRepeatedPage(
newPage,
c.maxRepetitionLevel,
c.maxDefinitionLevel,
repetitionLevels.data,
definitionLevels.data,
)
case c.maxDefinitionLevel > 0:
newPage = newOptionalPage(
newPage,
c.maxDefinitionLevel,
definitionLevels.data,
)
}
return newBufferedPage(newPage, vbuf, obuf, repetitionLevels, definitionLevels), nil
}
func decodeLevelsV1(enc encoding.Encoding, numValues int, data []byte) (*buffer, []byte, error) {
if len(data) < 4 {
return nil, data, io.ErrUnexpectedEOF
}
i := 4
j := 4 + int(binary.LittleEndian.Uint32(data))
if j > len(data) {
return nil, data, io.ErrUnexpectedEOF
}
levels, err := decodeLevels(enc, numValues, data[i:j])
return levels, data[j:], err
}
func decodeLevelsV2(enc encoding.Encoding, numValues int, data []byte, length int64) (*buffer, []byte, error) {
levels, err := decodeLevels(enc, numValues, data[:length])
return levels, data[length:], err
}
func decodeLevels(enc encoding.Encoding, numValues int, data []byte) (levels *buffer, err error) {
levels = buffers.get(numValues)
levels.data, err = enc.DecodeLevels(levels.data, data)
if err != nil {
levels.unref()
levels = nil
} else {
switch {
case len(levels.data) < numValues:
err = fmt.Errorf("decoding level expected %d values but got only %d", numValues, len(levels.data))
case len(levels.data) > numValues:
levels.data = levels.data[:numValues]
}
}
return levels, err
}
func skipLevelsV2(data []byte, length int64) ([]byte, error) {
if length >= int64(len(data)) {
return data, io.ErrUnexpectedEOF
}
return data[length:], nil
}
// DecodeDictionary decodes a data page from the header and compressed data
// passed as arguments.
func (c *Column) DecodeDictionary(header DictionaryPageHeader, page []byte) (Dictionary, error) {
return c.decodeDictionary(header, &buffer{data: page}, -1)
}
func (c *Column) decodeDictionary(header DictionaryPageHeader, page *buffer, size int32) (Dictionary, error) {
pageData := page.data
if isCompressed(c.compression) {
var err error
if page, err = c.decompress(pageData, size); err != nil {
return nil, fmt.Errorf("decompressing dictionary page: %w", err)
}
defer page.unref()
pageData = page.data
}
pageType := c.Type()
pageEncoding := header.Encoding()
if pageEncoding == format.PlainDictionary {
pageEncoding = format.Plain
}
numValues := int(header.NumValues())
values := pageType.NewValues(nil, nil)
values, err := pageType.Decode(values, pageData, LookupEncoding(pageEncoding))
if err != nil {
return nil, err
}
return pageType.NewDictionary(int(c.index), numValues, values), nil
}
var (
_ Node = (*Column)(nil)
)
================================================
FILE: column_buffer.go
================================================
package parquet
import (
"bytes"
"fmt"
"io"
"sort"
"unsafe"
"github.com/segmentio/parquet-go/deprecated"
"github.com/segmentio/parquet-go/encoding/plain"
"github.com/segmentio/parquet-go/internal/bitpack"
"github.com/segmentio/parquet-go/internal/unsafecast"
"github.com/segmentio/parquet-go/sparse"
)
// ColumnBuffer is an interface representing columns of a row group.
//
// ColumnBuffer implements sort.Interface as a way to support reordering the
// rows that have been written to it.
//
// The current implementation has a limitation which prevents applications from
// providing custom versions of this interface because it contains unexported
// methods. The only way to create ColumnBuffer values is to call the
// NewColumnBuffer of Type instances. This limitation may be lifted in future
// releases.
type ColumnBuffer interface {
// Exposes a read-only view of the column buffer.
ColumnChunk
// The column implements ValueReaderAt as a mechanism to read values at
// specific locations within the buffer.
ValueReaderAt
// The column implements ValueWriter as a mechanism to optimize the copy
// of values into the buffer in contexts where the row information is
// provided by the values because the repetition and definition levels
// are set.
ValueWriter
// For indexed columns, returns the underlying dictionary holding the column
// values. If the column is not indexed, nil is returned.
Dictionary() Dictionary
// Returns a copy of the column. The returned copy shares no memory with
// the original, mutations of either column will not modify the other.
Clone() ColumnBuffer
// Returns the column as a Page.
Page() Page
// Clears all rows written to the column.
Reset()
// Returns the current capacity of the column (rows).
Cap() int
// Returns the number of rows currently written to the column.
Len() int
// Compares rows at index i and j and reports whether i < j.
Less(i, j int) bool
// Swaps rows at index i and j.
Swap(i, j int)
// Returns the size of the column buffer in bytes.
Size() int64
// This method is employed to write rows from arrays of Go values into the
// column buffer. The method is currently unexported because it uses unsafe
// APIs which would be difficult for applications to leverage, increasing
// the risk of introducing bugs in the code. As a consequence, applications
// cannot use custom implementations of the ColumnBuffer interface since
// they cannot declare an unexported method that would match this signature.
// It means that in order to create a ColumnBuffer value, programs need to
// go through a call to NewColumnBuffer on a Type instance. We make this
// trade off for now as it is preferrable to optimize for safety over
// extensibility in the public APIs, we might revisit in the future if we
// learn about valid use cases for custom column buffer types.
writeValues(rows sparse.Array, levels columnLevels)
}
type columnLevels struct {
repetitionDepth byte
repetitionLevel byte
definitionLevel byte
}
func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitionLevels []byte) ColumnIndex {
return &nullableColumnIndex{
ColumnIndex: base.ColumnIndex(),
maxDefinitionLevel: maxDefinitionLevel,
definitionLevels: definitionLevels,
}
}
type nullableColumnIndex struct {
ColumnIndex
maxDefinitionLevel byte
definitionLevels []byte
}
func (index *nullableColumnIndex) NullPage(i int) bool {
return index.NullCount(i) == int64(len(index.definitionLevels))
}
func (index *nullableColumnIndex) NullCount(i int) int64 {
return int64(countLevelsNotEqual(index.definitionLevels, index.maxDefinitionLevel))
}
type nullOrdering func(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool
func nullsGoFirst(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
if definitionLevel1 != maxDefinitionLevel {
return definitionLevel2 == maxDefinitionLevel
} else {
return definitionLevel2 == maxDefinitionLevel && column.Less(i, j)
}
}
func nullsGoLast(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
return definitionLevel1 == maxDefinitionLevel && (definitionLevel2 != maxDefinitionLevel || column.Less(i, j))
}
// reversedColumnBuffer is an adapter of ColumnBuffer which inverses the order
// in which rows are ordered when the column gets sorted.
//
// This type is used when buffers are constructed with sorting columns ordering
// values in descending order.
type reversedColumnBuffer struct{ ColumnBuffer }
func (col *reversedColumnBuffer) Less(i, j int) bool { return col.ColumnBuffer.Less(j, i) }
// optionalColumnBuffer is an implementation of the ColumnBuffer interface used
// as a wrapper to an underlying ColumnBuffer to manage the creation of
// definition levels.
//
// Null values are not written to the underlying column; instead, the buffer
// tracks offsets of row values in the column, null row values are represented
// by the value -1 and a definition level less than the max.
//
// This column buffer type is used for all leaf columns that have a non-zero
// max definition level and a zero repetition level, which may be because the
// column or one of its parent(s) are marked optional.
type optionalColumnBuffer struct {
base ColumnBuffer
reordered bool
maxDefinitionLevel byte
rows []int32
sortIndex []int32
definitionLevels []byte
nullOrdering nullOrdering
}
func newOptionalColumnBuffer(base ColumnBuffer, maxDefinitionLevel byte, nullOrdering nullOrdering) *optionalColumnBuffer {
n := base.Cap()
return &optionalColumnBuffer{
base: base,
maxDefinitionLevel: maxDefinitionLevel,
rows: make([]int32, 0, n),
definitionLevels: make([]byte, 0, n),
nullOrdering: nullOrdering,
}
}
func (col *optionalColumnBuffer) Clone() ColumnBuffer {
return &optionalColumnBuffer{
base: col.base.Clone(),
reordered: col.reordered,
maxDefinitionLevel: col.maxDefinitionLevel,
rows: append([]int32{}, col.rows...),
definitionLevels: append([]byte{}, col.definitionLevels...),
nullOrdering: col.nullOrdering,
}
}
func (col *optionalColumnBuffer) Type() Type {
return col.base.Type()
}
func (col *optionalColumnBuffer) NumValues() int64 {
return int64(len(col.definitionLevels))
}
func (col *optionalColumnBuffer) ColumnIndex() ColumnIndex {
return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
}
func (col *optionalColumnBuffer) OffsetIndex() OffsetIndex {
return col.base.OffsetIndex()
}
func (col *optionalColumnBuffer) BloomFilter() BloomFilter {
return col.base.BloomFilter()
}
func (col *optionalColumnBuffer) Dictionary() Dictionary {
return col.base.Dictionary()
}
func (col *optionalColumnBuffer) Column() int {
return col.base.Column()
}
func (col *optionalColumnBuffer) Pages() Pages {
return onePage(col.Page())
}
func (col *optionalColumnBuffer) Page() Page {
// No need for any cyclic sorting if the rows have not been reordered.
// This case is also important because the cyclic sorting modifies the
// buffer which makes it unsafe to read the buffer concurrently.
if col.reordered {
numNulls := countLevelsNotEqual(col.definitionLevels, col.maxDefinitionLevel)
numValues := len(col.rows) - numNulls
if numValues > 0 {
if cap(col.sortIndex) < numValues {
col.sortIndex = make([]int32, numValues)
}
sortIndex := col.sortIndex[:numValues]
i := 0
for _, j := range col.rows {
if j >= 0 {
sortIndex[j] = int32(i)
i++
}
}
// Cyclic sort: O(N)
for i := range sortIndex {
for j := int(sortIndex[i]); i != j; j = int(sortIndex[i]) {
col.base.Swap(i, j)
sortIndex[i], sortIndex[j] = sortIndex[j], sortIndex[i]
}
}
}
i := 0
for _, r := range col.rows {
if r >= 0 {
col.rows[i] = int32(i)
i++
}
}
col.reordered = false
}
return newOptionalPage(col.base.Page(), col.maxDefinitionLevel, col.definitionLevels)
}
func (col *optionalColumnBuffer) Reset() {
col.base.Reset()
col.rows = col.rows[:0]
col.definitionLevels = col.definitionLevels[:0]
}
func (col *optionalColumnBuffer) Size() int64 {
return int64(4*len(col.rows)+4*len(col.sortIndex)+len(col.definitionLevels)) + col.base.Size()
}
func (col *optionalColumnBuffer) Cap() int { return cap(col.rows) }
func (col *optionalColumnBuffer) Len() int { return len(col.rows) }
func (col *optionalColumnBuffer) Less(i, j int) bool {
return col.nullOrdering(
col.base,
int(col.rows[i]),
int(col.rows[j]),
col.maxDefinitionLevel,
col.definitionLevels[i],
col.definitionLevels[j],
)
}
func (col *optionalColumnBuffer) Swap(i, j int) {
// Because the underlying column does not contain null values, we cannot
// swap its values at indexes i and j. We swap the row indexes only, then
// reorder the underlying buffer using a cyclic sort when the buffer is
// materialized into a page view.
col.reordered = true
col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
col.definitionLevels[i], col.definitionLevels[j] = col.definitionLevels[j], col.definitionLevels[i]
}
func (col *optionalColumnBuffer) WriteValues(values []Value) (n int, err error) {
rowIndex := int32(col.base.Len())
for n < len(values) {
// Collect index range of contiguous null values, from i to n. If this
// for loop exhausts the values, all remaining if statements and for
// loops will be no-ops and the loop will terminate.
i := n
for n < len(values) && values[n].definitionLevel != col.maxDefinitionLevel {
n++
}
// Write the contiguous null values up until the first non-null value
// obtained in the for loop above.
for _, v := range values[i:n] {
col.rows = append(col.rows, -1)
col.definitionLevels = append(col.definitionLevels, v.definitionLevel)
}
// Collect index range of contiguous non-null values, from i to n.
i = n
for n < len(values) && values[n].definitionLevel == col.maxDefinitionLevel {
n++
}
// As long as i < n we have non-null values still to write. It is
// possible that we just exhausted the input values in which case i == n
// and the outer for loop will terminate.
if i < n {
count, err := col.base.WriteValues(values[i:n])
col.definitionLevels = appendLevel(col.definitionLevels, col.maxDefinitionLevel, count)
for count > 0 {
col.rows = append(col.rows, rowIndex)
rowIndex++
count--
}
if err != nil {
return n, err
}
}
}
return n, nil
}
func (col *optionalColumnBuffer) writeValues(rows sparse.Array, levels columnLevels) {
// The row count is zero when writing an null optional value, in which case
// we still need to output a row to the buffer to record the definition
// level.
if rows.Len() == 0 {
col.definitionLevels = append(col.definitionLevels, levels.definitionLevel)
col.rows = append(col.rows, -1)
return
}
col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, rows.Len())
i := len(col.rows)
j := len(col.rows) + rows.Len()
if j <= cap(col.rows) {
col.rows = col.rows[:j]
} else {
tmp := make([]int32, j, 2*j)
copy(tmp, col.rows)
col.rows = tmp
}
if levels.definitionLevel != col.maxDefinitionLevel {
broadcastValueInt32(col.rows[i:], -1)
} else {
broadcastRangeInt32(col.rows[i:], int32(col.base.Len()))
col.base.writeValues(rows, levels)
}
}
func (col *optionalColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) {
length := int64(len(col.definitionLevels))
if offset < 0 {
return 0, errRowIndexOutOfBounds(offset, length)
}
if offset >= length {
return 0, io.EOF
}
if length -= offset; length < int64(len(values)) {
values = values[:length]
}
numNulls1 := int64(countLevelsNotEqual(col.definitionLevels[:offset], col.maxDefinitionLevel))
numNulls2 := int64(countLevelsNotEqual(col.definitionLevels[offset:offset+length], col.maxDefinitionLevel))
if numNulls2 < length {
n, err := col.base.ReadValuesAt(values[:length-numNulls2], offset-numNulls1)
if err != nil {
return n, err
}
}
if numNulls2 > 0 {
columnIndex := ^int16(col.Column())
i := numNulls2 - 1
j := length - 1
definitionLevels := col.definitionLevels[offset : offset+length]
maxDefinitionLevel := col.maxDefinitionLevel
for n := len(definitionLevels) - 1; n >= 0 && j > i; n-- {
if definitionLevels[n] != maxDefinitionLevel {
values[j] = Value{definitionLevel: definitionLevels[n], columnIndex: columnIndex}
} else {
values[j] = values[i]
i--
}
j--
}
}
return int(length), nil
}
// repeatedColumnBuffer is an implementation of the ColumnBuffer interface used
// as a wrapper to an underlying ColumnBuffer to manage the creation of
// repetition levels, definition levels, and map rows to the region of the
// underlying buffer that contains their sequence of values.
//
// Null values are not written to the underlying column; instead, the buffer
// tracks offsets of row values in the column, null row values are represented
// by the value -1 and a definition level less than the max.
//
// This column buffer type is used for all leaf columns that have a non-zero
// max repetition level, which may be because the column or one of its parent(s)
// are marked repeated.
type repeatedColumnBuffer struct {
base ColumnBuffer
reordered bool
maxRepetitionLevel byte
maxDefinitionLevel byte
rows []offsetMapping
repetitionLevels []byte
definitionLevels []byte
buffer []Value
reordering *repeatedColumnBuffer
nullOrdering nullOrdering
}
// The offsetMapping type maps the logical offset of rows within the repetition
// and definition levels, to the base offsets in the underlying column buffers
// where the non-null values have been written.
type offsetMapping struct {
offset uint32
baseOffset uint32
}
func newRepeatedColumnBuffer(base ColumnBuffer, maxRepetitionLevel, maxDefinitionLevel byte, nullOrdering nullOrdering) *repeatedColumnBuffer {
n := base.Cap()
return &repeatedColumnBuffer{
base: base,
maxRepetitionLevel: maxRepetitionLevel,
maxDefinitionLevel: maxDefinitionLevel,
rows: make([]offsetMapping, 0, n/8),
repetitionLevels: make([]byte, 0, n),
definitionLevels: make([]byte, 0, n),
nullOrdering: nullOrdering,
}
}
func (col *repeatedColumnBuffer) Clone() ColumnBuffer {
return &repeatedColumnBuffer{
base: col.base.Clone(),
reordered: col.reordered,
maxRepetitionLevel: col.maxRepetitionLevel,
maxDefinitionLevel: col.maxDefinitionLevel,
rows: append([]offsetMapping{}, col.rows...),
repetitionLevels: append([]byte{}, col.repetitionLevels...),
definitionLevels: append([]byte{}, col.definitionLevels...),
nullOrdering: col.nullOrdering,
}
}
func (col *repeatedColumnBuffer) Type() Type {
return col.base.Type()
}
func (col *repeatedColumnBuffer) NumValues() int64 {
return int64(len(col.definitionLevels))
}
func (col *repeatedColumnBuffer) ColumnIndex() ColumnIndex {
return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
}
func (col *repeatedColumnBuffer) OffsetIndex() OffsetIndex {
return col.base.OffsetIndex()
}
func (col *repeatedColumnBuffer) BloomFilter() BloomFilter {
return col.base.BloomFilter()
}
func (col *repeatedColumnBuffer) Dictionary() Dictionary {
return col.base.Dictionary()
}
func (col *repeatedColumnBuffer) Column() int {
return col.base.Column()
}
func (col *repeatedColumnBuffer) Pages() Pages {
return onePage(col.Page())
}
func (col *repeatedColumnBuffer) Page() Page {
if col.reordered {
if col.reordering == nil {
col.reordering = col.Clone().(*repeatedColumnBuffer)
}
column := col.reordering
column.Reset()
maxNumValues := 0
defer func() {
clearValues(col.buffer[:maxNumValues])
}()
baseOffset := 0
for _, row := range col.rows {
rowOffset := int(row.offset)
rowLength := repeatedRowLength(col.repetitionLevels[rowOffset:])
numNulls := countLevelsNotEqual(col.definitionLevels[rowOffset:rowOffset+rowLength], col.maxDefinitionLevel)
numValues := rowLength - numNulls
if numValues > 0 {
if numValues > cap(col.buffer) {
col.buffer = make([]Value, numValues)
} else {
col.buffer = col.buffer[:numValues]
}
n, err := col.base.ReadValuesAt(col.buffer, int64(row.baseOffset))
if err != nil && n < numValues {
return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
}
if _, err := column.base.WriteValues(col.buffer); err != nil {
return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
}
if numValues > maxNumValues {
maxNumValues = numValues
}
}
column.rows = append(column.rows, offsetMapping{
offset: uint32(len(column.repetitionLevels)),
baseOffset: uint32(baseOffset),
})
column.repetitionLevels = append(column.repetitionLevels, col.repetitionLevels[rowOffset:rowOffset+rowLength]...)
column.definitionLevels = append(column.definitionLevels, col.definitionLevels[rowOffset:rowOffset+rowLength]...)
baseOffset += numValues
}
col.swapReorderingBuffer(column)
col.reordered = false
}
return newRepeatedPage(
col.base.Page(),
col.maxRepetitionLevel,
col.maxDefinitionLevel,
col.repetitionLevels,
col.definitionLevels,
)
}
func (col *repeatedColumnBuffer) swapReorderingBuffer(buf *repeatedColumnBuffer) {
col.base, buf.base = buf.base, col.base
col.rows, buf.rows = buf.rows, col.rows
col.repetitionLevels, buf.repetitionLevels = buf.repetitionLevels, col.repetitionLevels
col.definitionLevels, buf.definitionLevels = buf.definitionLevels, col.definitionLevels
}
func (col *repeatedColumnBuffer) Reset() {
col.base.Reset()
col.rows = col.rows[:0]
col.repetitionLevels = col.repetitionLevels[:0]
col.definitionLevels = col.definitionLevels[:0]
}
func (col *repeatedColumnBuffer) Size() int64 {
return int64(8*len(col.rows)+len(col.repetitionLevels)+len(col.definitionLevels)) + col.base.Size()
}
func (col *repeatedColumnBuffer) Cap() int { return cap(col.rows) }
func (col *repeatedColumnBuffer) Len() int { return len(col.rows) }
func (col *repeatedColumnBuffer) Less(i, j int) bool {
row1 := col.rows[i]
row2 := col.rows[j]
less := col.nullOrdering
row1Length := repeatedRowLength(col.repetitionLevels[row1.offset:])
row2Length := repeatedRowLength(col.repetitionLevels[row2.offset:])
for k := 0; k < row1Length && k < row2Length; k++ {
x := int(row1.baseOffset)
y := int(row2.baseOffset)
definitionLevel1 := col.definitionLevels[int(row1.offset)+k]
definitionLevel2 := col.definitionLevels[int(row2.offset)+k]
switch {
case less(col.base, x, y, col.maxDefinitionLevel, definitionLevel1, definitionLevel2):
return true
case less(col.base, y, x, col.maxDefinitionLevel, definitionLevel2, definitionLevel1):
return false
}
}
return row1Length < row2Length
}
func (col *repeatedColumnBuffer) Swap(i, j int) {
// Because the underlying column does not contain null values, and may hold
// an arbitrary number of values per row, we cannot swap its values at
// indexes i and j. We swap the row indexes only, then reorder the base
// column buffer when its view is materialized into a page by creating a
// copy and writing rows back to it following the order of rows in the
// repeated column buffer.
col.reordered = true
col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
}
func (col *repeatedColumnBuffer) WriteValues(values []Value) (numValues int, err error) {
maxRowLen := 0
defer func() {
clearValues(col.buffer[:maxRowLen])
}()
for i := 0; i < len(values); {
j := i
if values[j].repetitionLevel == 0 {
j++
}
for j < len(values) && values[j].repetitionLevel != 0 {
j++
}
if err := col.writeRow(values[i:j]); err != nil {
return numValues, err
}
if len(col.buffer) > maxRowLen {
maxRowLen = len(col.buffer)
}
numValues += j - i
i = j
}
return numValues, nil
}
func (col
gitextract_ou8jci6u/ ├── .gitattributes ├── .github/ │ └── workflows/ │ └── test.yml ├── .gitignore ├── .mailmap ├── .words ├── AUTHORS.txt ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── allocator.go ├── array.go ├── array_go18.go ├── bitmap.go ├── bloom/ │ ├── block.go │ ├── block_amd64.go │ ├── block_amd64.s │ ├── block_default.go │ ├── block_optimized.go │ ├── block_test.go │ ├── bloom.go │ ├── bloom_test.go │ ├── filter.go │ ├── filter_amd64.go │ ├── filter_amd64.s │ ├── filter_default.go │ ├── filter_test.go │ ├── hash.go │ └── xxhash/ │ ├── LICENSE │ ├── sum64uint.go │ ├── sum64uint_amd64.go │ ├── sum64uint_amd64.s │ ├── sum64uint_purego.go │ ├── sum64uint_test.go │ ├── xxhash.go │ ├── xxhash_amd64.go │ ├── xxhash_amd64.s │ ├── xxhash_purego.go │ └── xxhash_test.go ├── bloom.go ├── bloom_test.go ├── buffer.go ├── buffer_go18.go ├── buffer_go18_test.go ├── buffer_internal_test.go ├── buffer_pool.go ├── buffer_pool_test.go ├── buffer_test.go ├── column.go ├── column_buffer.go ├── column_buffer_amd64.go ├── column_buffer_amd64.s ├── column_buffer_go18.go ├── column_buffer_purego.go ├── column_buffer_test.go ├── column_chunk.go ├── column_index.go ├── column_index_internal_test.go ├── column_index_test.go ├── column_mapping.go ├── column_mapping_test.go ├── column_path.go ├── column_test.go ├── compare.go ├── compare_test.go ├── compress/ │ ├── brotli/ │ │ └── brotli.go │ ├── compress.go │ ├── compress_test.go │ ├── gzip/ │ │ └── gzip.go │ ├── lz4/ │ │ └── lz4.go │ ├── snappy/ │ │ └── snappy.go │ ├── uncompressed/ │ │ └── uncompressed.go │ └── zstd/ │ └── zstd.go ├── compress.go ├── config.go ├── convert.go ├── convert_test.go ├── dedupe.go ├── dedupe_test.go ├── deprecated/ │ ├── int96.go │ ├── int96_test.go │ └── parquet.go ├── dictionary.go ├── dictionary_amd64.go ├── dictionary_amd64.s ├── dictionary_purego.go ├── dictionary_test.go ├── encoding/ │ ├── bitpacked/ │ │ ├── bitpacked.go │ │ └── bitpacked_test.go │ ├── bytestreamsplit/ │ │ ├── bytestreamsplit.go │ │ ├── bytestreamsplit_amd64.go │ │ ├── bytestreamsplit_amd64.s │ │ ├── bytestreamsplit_purego.go │ │ └── bytestreamsplit_test.go │ ├── delta/ │ │ ├── binary_packed.go │ │ ├── binary_packed_amd64.go │ │ ├── binary_packed_amd64.s │ │ ├── binary_packed_amd64_test.go │ │ ├── binary_packed_purego.go │ │ ├── binary_packed_test.go │ │ ├── byte_array.go │ │ ├── byte_array_amd64.go │ │ ├── byte_array_amd64.s │ │ ├── byte_array_purego.go │ │ ├── byte_array_test.go │ │ ├── delta.go │ │ ├── delta_amd64.go │ │ ├── delta_amd64.s │ │ ├── delta_test.go │ │ ├── length_byte_array.go │ │ ├── length_byte_array_amd64.go │ │ ├── length_byte_array_amd64.s │ │ ├── length_byte_array_purego.go │ │ ├── length_byte_array_test.go │ │ └── testdata/ │ │ └── fuzz/ │ │ └── FuzzDeltaByteArray/ │ │ ├── 2404234dd7e87c04303eb7e58208d5b2ccb04fb616c18f3254e2375c4bc327e3 │ │ ├── 4cf9c92e5a2096e3d6c42eaf9b1e31d2567854d33e06c8d2d7a8c46437345850 │ │ ├── 9b210529f5e34e2dea5824929bf0d8242dc9c3165c0dce10bb376c50e21b38cc │ │ └── fbe137144bcda3a149c8ea109703f3242192c5480ea1e82dde0ea24e94f3afef │ ├── encoding.go │ ├── encoding_test.go │ ├── fuzz/ │ │ └── fuzz.go │ ├── notsupported.go │ ├── plain/ │ │ ├── dictionary.go │ │ ├── plain.go │ │ └── plain_test.go │ ├── rle/ │ │ ├── dictionary.go │ │ ├── rle.go │ │ ├── rle_amd64.go │ │ ├── rle_amd64.s │ │ ├── rle_amd64_test.go │ │ ├── rle_purego.go │ │ ├── rle_test.go │ │ └── testdata/ │ │ └── fuzz/ │ │ ├── FuzzEncodeBoolean/ │ │ │ ├── 6be5e340694798c2e5b94c758f0262edd2edf8af5795d4c6c60f6e02643bbb96 │ │ │ └── 9772b3f21a6f61810fe38d120bcc9da6d78540f22dc819a4201283608671fdf4 │ │ ├── FuzzEncodeInt32/ │ │ │ └── 06ba4bdb19de593e669c642987e270fe2488d4d58ecd712db136a3e011071253 │ │ └── FuzzEncodeLevels/ │ │ └── 0468684de48f926219bfc47be13ddf085b5a0ed9fbd9c40a005641b253e88d33 │ ├── test/ │ │ ├── test_go17.go │ │ └── test_go18.go │ ├── values.go │ └── values_test.go ├── encoding.go ├── errors.go ├── example_test.go ├── file.go ├── file_test.go ├── filter.go ├── filter_test.go ├── format/ │ ├── parquet.go │ └── parquet_test.go ├── go.mod ├── go.sum ├── hashprobe/ │ ├── aeshash/ │ │ ├── aeshash.go │ │ ├── aeshash_amd64.go │ │ ├── aeshash_amd64.s │ │ ├── aeshash_purego.go │ │ └── aeshash_test.go │ ├── hashprobe.go │ ├── hashprobe_amd64.go │ ├── hashprobe_amd64.s │ ├── hashprobe_purego.go │ ├── hashprobe_test.go │ └── wyhash/ │ ├── wyhash.go │ ├── wyhash_amd64.go │ ├── wyhash_amd64.s │ ├── wyhash_purego.go │ └── wyhash_test.go ├── internal/ │ ├── bitpack/ │ │ ├── bitpack.go │ │ ├── masks_int32_amd64.s │ │ ├── pack.go │ │ ├── unpack.go │ │ ├── unpack_int32_amd64.go │ │ ├── unpack_int32_amd64.s │ │ ├── unpack_int32_purego.go │ │ ├── unpack_int64_amd64.go │ │ ├── unpack_int64_amd64.s │ │ ├── unpack_int64_purego.go │ │ └── unpack_test.go │ ├── bytealg/ │ │ ├── broadcast_amd64.go │ │ ├── broadcast_amd64.s │ │ ├── broadcast_purego.go │ │ ├── broadcast_test.go │ │ ├── bytealg.go │ │ ├── bytealg_amd64.go │ │ ├── bytealg_test.go │ │ ├── count_amd64.go │ │ ├── count_amd64.s │ │ ├── count_purego.go │ │ └── count_test.go │ ├── debug/ │ │ ├── debug.go │ │ ├── finalizer_off.go │ │ └── finalizer_on.go │ ├── quick/ │ │ └── quick.go │ └── unsafecast/ │ ├── unsafecast_go17.go │ ├── unsafecast_go18.go │ └── unsafecast_go18_test.go ├── level.go ├── limits.go ├── merge.go ├── merge_test.go ├── multi_row_group.go ├── node.go ├── null.go ├── null_amd64.go ├── null_amd64.s ├── null_purego.go ├── null_test.go ├── offset_index.go ├── order.go ├── order_amd64.go ├── order_amd64.s ├── order_purego.go ├── order_test.go ├── page.go ├── page_bounds.go ├── page_bounds_amd64.go ├── page_bounds_amd64.s ├── page_bounds_purego.go ├── page_bounds_test.go ├── page_header.go ├── page_max.go ├── page_max_amd64.go ├── page_max_amd64.s ├── page_max_purego.go ├── page_max_test.go ├── page_min.go ├── page_min_amd64.go ├── page_min_amd64.s ├── page_min_purego.go ├── page_min_test.go ├── page_test.go ├── page_values.go ├── parquet.go ├── parquet_amd64.go ├── parquet_go18.go ├── parquet_go18_test.go ├── parquet_test.go ├── print.go ├── print_test.go ├── reader.go ├── reader_go18.go ├── reader_go18_test.go ├── reader_test.go ├── row.go ├── row_buffer.go ├── row_buffer_test.go ├── row_builder.go ├── row_builder_test.go ├── row_group.go ├── row_group_test.go ├── row_test.go ├── scan.go ├── scan_test.go ├── schema.go ├── schema_test.go ├── search.go ├── search_test.go ├── sorting.go ├── sorting_test.go ├── sparse/ │ ├── array.go │ ├── gather.go │ ├── gather_amd64.go │ ├── gather_amd64.s │ ├── gather_purego.go │ ├── gather_test.go │ └── sparse.go ├── testdata/ │ ├── alltypes_dictionary.parquet │ ├── alltypes_plain.parquet │ ├── alltypes_plain.snappy.parquet │ ├── alltypes_tiny_pages.parquet │ ├── alltypes_tiny_pages_plain.parquet │ ├── binary.parquet │ ├── byte_array_decimal.parquet │ ├── cluster_test_table_1.snappy.parquet │ ├── cluster_test_table_2.snappy.parquet │ ├── covid.snappy.parquet │ ├── data_index_bloom_encoding_stats.parquet │ ├── datapage_v2.snappy.parquet │ ├── delta_binary_packed.parquet │ ├── delta_byte_array.parquet │ ├── delta_encoding_optional_column.parquet │ ├── delta_encoding_required_column.parquet │ ├── delta_length_byte_array.parquet │ ├── dict-page-offset-zero.parquet │ ├── dms_test_table_LOAD00000001.parquet │ ├── empty.parquet │ ├── file.parquet │ ├── fixed_length_decimal.parquet │ ├── fixed_length_decimal_legacy.parquet │ ├── int32_decimal.parquet │ ├── int64_decimal.parquet │ ├── issue368.parquet │ ├── list_columns.parquet │ ├── lz4_raw_compressed.parquet │ ├── lz4_raw_compressed_larger.parquet │ ├── nested_lists.snappy.parquet │ ├── nested_maps.snappy.parquet │ ├── nested_structs.rust.parquet │ ├── nonnullable.impala.parquet │ ├── null_list.parquet │ ├── nullable.impala.parquet │ ├── nulls.snappy.parquet │ ├── repeated_no_annotation.parquet │ ├── rle_boolean_encoding.parquet │ ├── single_nan.parquet │ ├── small.parquet │ └── trace.snappy.parquet ├── transform.go ├── transform_test.go ├── type.go ├── value.go ├── value_amd64.go ├── value_amd64.s ├── value_go17.go ├── value_go18.go ├── value_test.go ├── values_purego.go ├── writer.go ├── writer_go18.go ├── writer_go18_test.go └── writer_test.go
Showing preview only (457K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (4932 symbols across 226 files)
FILE: allocator.go
type allocator (line 5) | type allocator struct
method makeBytes (line 7) | func (a *allocator) makeBytes(n int) []byte {
method copyBytes (line 25) | func (a *allocator) copyBytes(v []byte) []byte {
method copyString (line 31) | func (a *allocator) copyString(v string) string {
method reset (line 37) | func (a *allocator) reset() {
type rowAllocator (line 51) | type rowAllocator struct
method capture (line 53) | func (a *rowAllocator) capture(row Row) {
FILE: array.go
function makeArrayValue (line 9) | func makeArrayValue(values []Value, offset uintptr) sparse.Array {
function makeArrayString (line 14) | func makeArrayString(values []string) sparse.Array {
function makeArrayBE128 (line 20) | func makeArrayBE128(values []*[16]byte) sparse.Array {
FILE: array_go18.go
function makeArray (line 12) | func makeArray(base unsafe.Pointer, length int, offset uintptr) sparse.A...
function makeArrayOf (line 16) | func makeArrayOf[T any](s []T) sparse.Array {
function makeSlice (line 21) | func makeSlice[T any](a sparse.Array) []T {
function slice (line 25) | func slice[T any](p unsafe.Pointer, n int) []T {
type sliceHeader (line 29) | type sliceHeader struct
FILE: bitmap.go
type bitmap (line 5) | type bitmap struct
method reset (line 9) | func (m *bitmap) reset(size int) {
method clear (line 19) | func (m *bitmap) clear() {
function acquireBitmap (line 29) | func acquireBitmap(n int) *bitmap {
function releaseBitmap (line 39) | func releaseBitmap(b *bitmap) {
FILE: bloom.go
type BloomFilter (line 16) | type BloomFilter interface
type bloomFilter (line 34) | type bloomFilter struct
method Check (line 40) | func (f *bloomFilter) Check(v Value) (bool, error) {
method hash (line 44) | func (v Value) hash(h bloom.Hash) uint64 {
function newBloomFilter (line 57) | func newBloomFilter(file io.ReaderAt, offset int64, header *format.Bloom...
type BloomFilterColumn (line 74) | type BloomFilterColumn interface
function SplitBlockFilter (line 99) | func SplitBlockFilter(bitsPerValue uint, path ...string) BloomFilterColu...
type splitBlockFilter (line 106) | type splitBlockFilter struct
method Path (line 111) | func (f splitBlockFilter) Path() []string { return f.path }
method Hash (line 112) | func (f splitBlockFilter) Hash() bloom.Hash { return bloom....
method Encoding (line 113) | func (f splitBlockFilter) Encoding() encoding.Encoding { return splitB...
method Size (line 115) | func (f splitBlockFilter) Size(numValues int64) int {
function bloomFilterHeader (line 124) | func bloomFilterHeader(filter BloomFilterColumn) (header format.BloomFil...
function searchBloomFilterColumn (line 137) | func searchBloomFilterColumn(filters []BloomFilterColumn, path columnPat...
constant filterEncodeBufferSize (line 152) | filterEncodeBufferSize = 128
type splitBlockEncoding (line 155) | type splitBlockEncoding struct
method EncodeBoolean (line 159) | func (splitBlockEncoding) EncodeBoolean(dst []byte, src []byte) ([]byt...
method EncodeInt32 (line 164) | func (splitBlockEncoding) EncodeInt32(dst []byte, src []int32) ([]byte...
method EncodeInt64 (line 169) | func (splitBlockEncoding) EncodeInt64(dst []byte, src []int64) ([]byte...
method EncodeInt96 (line 174) | func (e splitBlockEncoding) EncodeInt96(dst []byte, src []deprecated.I...
method EncodeFloat (line 179) | func (splitBlockEncoding) EncodeFloat(dst []byte, src []float32) ([]by...
method EncodeDouble (line 184) | func (splitBlockEncoding) EncodeDouble(dst []byte, src []float64) ([]b...
method EncodeByteArray (line 189) | func (splitBlockEncoding) EncodeByteArray(dst []byte, src []byte, offs...
method EncodeFixedLenByteArray (line 210) | func (splitBlockEncoding) EncodeFixedLenByteArray(dst []byte, src []by...
function splitBlockEncodeFixedLenByteArray (line 220) | func splitBlockEncodeFixedLenByteArray(filter bloom.SplitBlockFilter, da...
function splitBlockEncodeUint8 (line 236) | func splitBlockEncodeUint8(filter bloom.SplitBlockFilter, values []uint8) {
function splitBlockEncodeUint32 (line 246) | func splitBlockEncodeUint32(filter bloom.SplitBlockFilter, values []uint...
function splitBlockEncodeUint64 (line 256) | func splitBlockEncodeUint64(filter bloom.SplitBlockFilter, values []uint...
function splitBlockEncodeUint128 (line 266) | func splitBlockEncodeUint128(filter bloom.SplitBlockFilter, values [][16...
FILE: bloom/block.go
type Word (line 6) | type Word
type Block (line 9) | type Block
method Bytes (line 12) | func (b *Block) Bytes() []byte {
constant BlockSize (line 18) | BlockSize = 32
constant salt0 (line 20) | salt0 = 0x47b6137b
constant salt1 (line 21) | salt1 = 0x44974d91
constant salt2 (line 22) | salt2 = 0x8824ad5b
constant salt3 (line 23) | salt3 = 0xa2b7289d
constant salt4 (line 24) | salt4 = 0x705495c7
constant salt5 (line 25) | salt5 = 0x2df1424b
constant salt6 (line 26) | salt6 = 0x9efc4947
constant salt7 (line 27) | salt7 = 0x5c6bfb31
FILE: bloom/block_amd64.go
function blockInsert (line 32) | func blockInsert(b *Block, x uint32)
function blockCheck (line 35) | func blockCheck(b *Block, x uint32) bool
method Insert (line 37) | func (b *Block) Insert(x uint32) { blockInsert(b, x) }
method Check (line 39) | func (b *Block) Check(x uint32) bool { return blockCheck(b, x) }
FILE: bloom/block_default.go
method set (line 25) | func (w *Word) set(i uint) {
method has (line 29) | func (w Word) has(i uint) bool {
function mask (line 33) | func mask(x uint32) Block {
method Insert (line 42) | func (b *Block) Insert(x uint32) {
method Check (line 53) | func (b *Block) Check(x uint32) bool {
FILE: bloom/block_optimized.go
method Insert (line 27) | func (b *Block) Insert(x uint32) {
method Check (line 38) | func (b *Block) Check(x uint32) bool {
method insertBulk (line 49) | func (f SplitBlockFilter) insertBulk(x []uint64) {
FILE: bloom/block_test.go
function TestBlock (line 10) | func TestBlock(t *testing.T) {
function BenchmarkBlockInsert (line 30) | func BenchmarkBlockInsert(b *testing.B) {
function BenchmarkBlockCheck (line 38) | func BenchmarkBlockCheck(b *testing.B) {
FILE: bloom/bloom.go
function fasthash1x64 (line 4) | func fasthash1x64(value uint64, scale int32) uint64 {
function fasthash4x64 (line 8) | func fasthash4x64(dst, src *[4]uint64, scale int32) {
FILE: bloom/bloom_test.go
function TestFasthash (line 11) | func TestFasthash(t *testing.T) {
function BenchmarkFasthash (line 26) | func BenchmarkFasthash(b *testing.B) {
FILE: bloom/filter.go
type Filter (line 11) | type Filter interface
type SplitBlockFilter (line 19) | type SplitBlockFilter
method Reset (line 43) | func (f SplitBlockFilter) Reset() {
method Block (line 51) | func (f SplitBlockFilter) Block(x uint64) *Block { return &f[fasthash1...
method InsertBulk (line 54) | func (f SplitBlockFilter) InsertBulk(x []uint64) { filterInsertBulk(f,...
method Insert (line 57) | func (f SplitBlockFilter) Insert(x uint64) { filterInsert(f, x) }
method Check (line 60) | func (f SplitBlockFilter) Check(x uint64) bool { return filterCheck(f,...
method Bytes (line 66) | func (f SplitBlockFilter) Bytes() []byte {
function MakeSplitBlockFilter (line 23) | func MakeSplitBlockFilter(data []byte) SplitBlockFilter {
function NumSplitBlocksOf (line 36) | func NumSplitBlocksOf(numValues int64, bitsPerValue uint) int {
function CheckSplitBlock (line 74) | func CheckSplitBlock(r io.ReaderAt, n int64, x uint64) (bool, error) {
function acquireBlock (line 86) | func acquireBlock() *Block {
function releaseBlock (line 94) | func releaseBlock(b *Block) {
FILE: bloom/filter_amd64.go
function filterInsertBulk (line 27) | func filterInsertBulk(f []Block, x []uint64)
function filterInsert (line 30) | func filterInsert(f []Block, x uint64)
function filterCheck (line 33) | func filterCheck(f []Block, x uint64) bool
FILE: bloom/filter_default.go
function filterInsertBulk (line 5) | func filterInsertBulk(f []Block, x []uint64) {
function filterInsert (line 11) | func filterInsert(f []Block, x uint64) {
function filterCheck (line 15) | func filterCheck(f []Block, x uint64) bool {
FILE: bloom/filter_test.go
function TestSplitBlockFilter (line 11) | func TestSplitBlockFilter(t *testing.T) {
function TestSplitBlockFilterBug1 (line 80) | func TestSplitBlockFilterBug1(t *testing.T) {
type serializedFilter (line 96) | type serializedFilter struct
method Check (line 100) | func (f *serializedFilter) Check(x uint64) bool {
function newSerializedFilter (line 105) | func newSerializedFilter(b []byte) *serializedFilter {
function BenchmarkFilterInsertBulk (line 111) | func BenchmarkFilterInsertBulk(b *testing.B) {
function BenchmarkFilterInsert (line 127) | func BenchmarkFilterInsert(b *testing.B) {
function BenchmarkFilterCheck (line 135) | func BenchmarkFilterCheck(b *testing.B) {
FILE: bloom/hash.go
type Hash (line 8) | type Hash interface
type XXH64 (line 29) | type XXH64 struct
method Sum64 (line 31) | func (XXH64) Sum64(b []byte) uint64 {
method Sum64Uint8 (line 35) | func (XXH64) Sum64Uint8(v uint8) uint64 {
method Sum64Uint16 (line 39) | func (XXH64) Sum64Uint16(v uint16) uint64 {
method Sum64Uint32 (line 43) | func (XXH64) Sum64Uint32(v uint32) uint64 {
method Sum64Uint64 (line 47) | func (XXH64) Sum64Uint64(v uint64) uint64 {
method Sum64Uint128 (line 51) | func (XXH64) Sum64Uint128(v [16]byte) uint64 {
method MultiSum64Uint8 (line 55) | func (XXH64) MultiSum64Uint8(h []uint64, v []uint8) int {
method MultiSum64Uint16 (line 59) | func (XXH64) MultiSum64Uint16(h []uint64, v []uint16) int {
method MultiSum64Uint32 (line 63) | func (XXH64) MultiSum64Uint32(h []uint64, v []uint32) int {
method MultiSum64Uint64 (line 67) | func (XXH64) MultiSum64Uint64(h []uint64, v []uint64) int {
method MultiSum64Uint128 (line 71) | func (XXH64) MultiSum64Uint128(h []uint64, v [][16]byte) int {
FILE: bloom/xxhash/sum64uint.go
function Sum64Uint8 (line 3) | func Sum64Uint8(v uint8) uint64 {
function Sum64Uint16 (line 9) | func Sum64Uint16(v uint16) uint64 {
function Sum64Uint32 (line 18) | func Sum64Uint32(v uint32) uint64 {
function Sum64Uint64 (line 24) | func Sum64Uint64(v uint64) uint64 {
function Sum64Uint128 (line 30) | func Sum64Uint128(v [16]byte) uint64 {
FILE: bloom/xxhash/sum64uint_amd64.go
function MultiSum64Uint8 (line 37) | func MultiSum64Uint8(h []uint64, v []uint8) int
function MultiSum64Uint16 (line 40) | func MultiSum64Uint16(h []uint64, v []uint16) int
function MultiSum64Uint32 (line 43) | func MultiSum64Uint32(h []uint64, v []uint32) int
function MultiSum64Uint64 (line 46) | func MultiSum64Uint64(h []uint64, v []uint64) int
function MultiSum64Uint128 (line 49) | func MultiSum64Uint128(h []uint64, v [][16]byte) int
FILE: bloom/xxhash/sum64uint_purego.go
function MultiSum64Uint8 (line 5) | func MultiSum64Uint8(h []uint64, v []uint8) int {
function MultiSum64Uint16 (line 15) | func MultiSum64Uint16(h []uint64, v []uint16) int {
function MultiSum64Uint32 (line 25) | func MultiSum64Uint32(h []uint64, v []uint32) int {
function MultiSum64Uint64 (line 35) | func MultiSum64Uint64(h []uint64, v []uint64) int {
function MultiSum64Uint128 (line 45) | func MultiSum64Uint128(h []uint64, v [][16]byte) int {
function min (line 55) | func min(a, b int) int {
FILE: bloom/xxhash/sum64uint_test.go
function TestSumUint8 (line 13) | func TestSumUint8(t *testing.T) {
function TestSumUint16 (line 22) | func TestSumUint16(t *testing.T) {
function TestSumUint32 (line 31) | func TestSumUint32(t *testing.T) {
function TestSumUint64 (line 40) | func TestSumUint64(t *testing.T) {
function TestSumUint128 (line 49) | func TestSumUint128(t *testing.T) {
function TestMultiSum64Uint8 (line 58) | func TestMultiSum64Uint8(t *testing.T) {
function TestMultiSum64Uint16 (line 80) | func TestMultiSum64Uint16(t *testing.T) {
function TestMultiSum64Uint32 (line 104) | func TestMultiSum64Uint32(t *testing.T) {
function TestMultiSum64Uint64 (line 128) | func TestMultiSum64Uint64(t *testing.T) {
function TestMultiSum64Uint128 (line 152) | func TestMultiSum64Uint128(t *testing.T) {
function reportThroughput (line 174) | func reportThroughput(b *testing.B, loops, count int, start time.Time) {
constant benchmarkBufferSize (line 184) | benchmarkBufferSize = 4096
function BenchmarkMultiSum64Uint8 (line 186) | func BenchmarkMultiSum64Uint8(b *testing.B) {
function BenchmarkMultiSum64Uint16 (line 201) | func BenchmarkMultiSum64Uint16(b *testing.B) {
function BenchmarkMultiSum64Uint32 (line 216) | func BenchmarkMultiSum64Uint32(b *testing.B) {
function BenchmarkMultiSum64Uint64 (line 231) | func BenchmarkMultiSum64Uint64(b *testing.B) {
function BenchmarkMultiSum64Uint128 (line 246) | func BenchmarkMultiSum64Uint128(b *testing.B) {
FILE: bloom/xxhash/xxhash.go
constant prime1 (line 11) | prime1 uint64 = 0x9E3779B185EBCA87
constant prime2 (line 12) | prime2 uint64 = 0xC2B2AE3D27D4EB4F
constant prime3 (line 13) | prime3 uint64 = 0x165667B19E3779F9
constant prime4 (line 14) | prime4 uint64 = 0x85EBCA77C2B2AE63
constant prime5 (line 15) | prime5 uint64 = 0x27D4EB2F165667C5
constant prime1plus2 (line 18) | prime1plus2 uint64 = 0x60EA27EEADC0B5D6
constant negprime1 (line 19) | negprime1 uint64 = 0x61C8864E7A143579
function avalanche (line 22) | func avalanche(h uint64) uint64 {
function round (line 31) | func round(acc, input uint64) uint64 {
function mergeRound (line 38) | func mergeRound(acc, val uint64) uint64 {
function u64 (line 45) | func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
function u32 (line 46) | func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
function rol1 (line 48) | func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
function rol7 (line 49) | func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
function rol11 (line 50) | func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
function rol12 (line 51) | func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
function rol18 (line 52) | func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
function rol23 (line 53) | func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
function rol27 (line 54) | func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
function rol31 (line 55) | func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }
FILE: bloom/xxhash/xxhash_amd64.go
function Sum64 (line 6) | func Sum64(b []byte) uint64
FILE: bloom/xxhash/xxhash_purego.go
function Sum64 (line 6) | func Sum64(b []byte) uint64 {
FILE: bloom/xxhash/xxhash_test.go
function TestSum64 (line 9) | func TestSum64(t *testing.T) {
function BenchmarkSum64 (line 46) | func BenchmarkSum64(b *testing.B) {
FILE: bloom_test.go
function TestSplitBlockFilter (line 13) | func TestSplitBlockFilter(t *testing.T) {
function BenchmarkSplitBlockFilter (line 157) | func BenchmarkSplitBlockFilter(b *testing.B) {
FILE: buffer.go
type Buffer (line 18) | type Buffer struct
method configure (line 59) | func (buf *Buffer) configure(schema *Schema) {
method Size (line 117) | func (buf *Buffer) Size() int64 {
method NumRows (line 126) | func (buf *Buffer) NumRows() int64 { return int64(buf.Len()) }
method ColumnChunks (line 129) | func (buf *Buffer) ColumnChunks() []ColumnChunk { return buf.chunks }
method ColumnBuffers (line 140) | func (buf *Buffer) ColumnBuffers() []ColumnBuffer { return buf.columns }
method Schema (line 146) | func (buf *Buffer) Schema() *Schema { return buf.schema }
method SortingColumns (line 153) | func (buf *Buffer) SortingColumns() []SortingColumn { return buf.confi...
method Len (line 156) | func (buf *Buffer) Len() int {
method Less (line 166) | func (buf *Buffer) Less(i, j int) bool {
method Swap (line 179) | func (buf *Buffer) Swap(i, j int) {
method Reset (line 186) | func (buf *Buffer) Reset() {
method Write (line 193) | func (buf *Buffer) Write(row interface{}) error {
method WriteRows (line 207) | func (buf *Buffer) WriteRows(rows []Row) (int, error) {
method WriteRowGroup (line 240) | func (buf *Buffer) WriteRowGroup(rowGroup RowGroup) (int64, error) {
method Rows (line 264) | func (buf *Buffer) Rows() Rows { return newRowGroupRows(buf, ReadModeS...
function NewBuffer (line 45) | func NewBuffer(options ...RowGroupOption) *Buffer {
type bufferWriter (line 269) | type bufferWriter struct
method WriteRows (line 271) | func (w bufferWriter) WriteRows(rows []Row) (int, error) {
method WriteValues (line 275) | func (w bufferWriter) WriteValues(values []Value) (int, error) {
method WritePage (line 279) | func (w bufferWriter) WritePage(page Page) (int64, error) {
type buffer (line 293) | type buffer struct
method refCount (line 300) | func (b *buffer) refCount() int {
method ref (line 304) | func (b *buffer) ref() {
method unref (line 308) | func (b *buffer) unref() {
function monitorBufferRelease (line 316) | func monitorBufferRelease(b *buffer) {
type bufferPool (line 322) | type bufferPool struct
method newBuffer (line 338) | func (p *bufferPool) newBuffer(bufferSize, bucketSize int) *buffer {
method get (line 353) | func (p *bufferPool) get(bufferSize int) *buffer {
method put (line 374) | func (p *bufferPool) put(b *buffer) {
constant bufferPoolBucketCount (line 387) | bufferPoolBucketCount = 32
constant bufferPoolMinSize (line 388) | bufferPoolMinSize = 4096
constant bufferPoolLastShortBucketSize (line 389) | bufferPoolLastShortBucketSize = 262144
function bufferPoolNextSize (line 392) | func bufferPoolNextSize(size int) int {
function bufferPoolBucketIndexAndSizeOfGet (line 400) | func bufferPoolBucketIndexAndSizeOfGet(size int) (int, int) {
function bufferPoolBucketIndexAndSizeOfPut (line 413) | func bufferPoolBucketIndexAndSizeOfPut(size int) (int, int) {
type bufferedPage (line 434) | type bufferedPage struct
method Slice (line 457) | func (p *bufferedPage) Slice(i, j int64) Page {
method Retain (line 467) | func (p *bufferedPage) Retain() {
method Release (line 474) | func (p *bufferedPage) Release() {
function newBufferedPage (line 442) | func newBufferedPage(page Page, values, offsets, definitionLevels, repet...
function bufferRef (line 481) | func bufferRef(buf *buffer) {
function bufferUnref (line 487) | func bufferUnref(buf *buffer) {
function Retain (line 507) | func Retain(page Page) {
function Release (line 529) | func Release(page Page) {
type retainable (line 535) | type retainable interface
type releasable (line 539) | type releasable interface
FILE: buffer_go18.go
type GenericBuffer (line 14) | type GenericBuffer struct
function NewGenericBuffer (line 30) | func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] {
function typeOf (line 53) | func typeOf[T any]() reflect.Type {
type bufferFunc (line 58) | type bufferFunc
function bufferFuncOf (line 60) | func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
function makeBufferFunc (line 79) | func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] {
method Size (line 90) | func (buf *GenericBuffer[T]) Size() int64 {
method NumRows (line 94) | func (buf *GenericBuffer[T]) NumRows() int64 {
method ColumnChunks (line 98) | func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk {
method ColumnBuffers (line 102) | func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer {
method SortingColumns (line 106) | func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn {
method Len (line 110) | func (buf *GenericBuffer[T]) Len() int {
method Less (line 114) | func (buf *GenericBuffer[T]) Less(i, j int) bool {
method Swap (line 118) | func (buf *GenericBuffer[T]) Swap(i, j int) {
method Reset (line 122) | func (buf *GenericBuffer[T]) Reset() {
method Write (line 126) | func (buf *GenericBuffer[T]) Write(rows []T) (int, error) {
method WriteRows (line 133) | func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) {
method WriteRowGroup (line 137) | func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, er...
method Rows (line 141) | func (buf *GenericBuffer[T]) Rows() Rows {
method Schema (line 145) | func (buf *GenericBuffer[T]) Schema() *Schema {
method writeRows (line 149) | func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) {
FILE: buffer_go18_test.go
function TestGenericBuffer (line 18) | func TestGenericBuffer(t *testing.T) {
function testGenericBuffer (line 47) | func testGenericBuffer[Row any](t *testing.T) {
function testGenericBufferRows (line 66) | func testGenericBufferRows[Row any](rows []Row) error {
function setNullPointers (line 88) | func setNullPointers[Row any](rows []Row) {
type generator (line 99) | type generator interface
function BenchmarkGenericBuffer (line 103) | func BenchmarkGenericBuffer(b *testing.B) {
function benchmarkGenericBuffer (line 125) | func benchmarkGenericBuffer[Row generator[Row]](b *testing.B) {
function TestIssue327 (line 175) | func TestIssue327(t *testing.T) {
function TestIssue346 (line 191) | func TestIssue346(t *testing.T) {
function TestIssue347 (line 204) | func TestIssue347(t *testing.T) {
function BenchmarkSortGenericBuffer (line 225) | func BenchmarkSortGenericBuffer(b *testing.B) {
FILE: buffer_internal_test.go
function TestBufferAlwaysCorrectSize (line 9) | func TestBufferAlwaysCorrectSize(t *testing.T) {
function TestBufferPoolBucketIndexAndSizeOf (line 21) | func TestBufferPoolBucketIndexAndSizeOf(t *testing.T) {
FILE: buffer_pool.go
type BufferPool (line 24) | type BufferPool interface
function NewBufferPool (line 42) | func NewBufferPool() BufferPool { return new(memoryBufferPool) }
type memoryBuffer (line 44) | type memoryBuffer struct
method Reset (line 49) | func (p *memoryBuffer) Reset() {
method Read (line 53) | func (p *memoryBuffer) Read(b []byte) (n int, err error) {
method Write (line 62) | func (p *memoryBuffer) Write(b []byte) (int, error) {
method WriteTo (line 74) | func (p *memoryBuffer) WriteTo(w io.Writer) (int64, error) {
method Seek (line 80) | func (p *memoryBuffer) Seek(offset int64, whence int) (int64, error) {
type memoryBufferPool (line 97) | type memoryBufferPool struct
method GetBuffer (line 99) | func (pool *memoryBufferPool) GetBuffer() io.ReadWriteSeeker {
method PutBuffer (line 109) | func (pool *memoryBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
type fileBufferPool (line 115) | type fileBufferPool struct
method GetBuffer (line 131) | func (pool *fileBufferPool) GetBuffer() io.ReadWriteSeeker {
method PutBuffer (line 142) | func (pool *fileBufferPool) PutBuffer(buf io.ReadWriteSeeker) {
function NewFileBufferPool (line 122) | func NewFileBufferPool(tempdir, pattern string) BufferPool {
type errorBuffer (line 149) | type errorBuffer struct
method Read (line 151) | func (buf *errorBuffer) Read([]byte) (int, error) { return 0,...
method Write (line 152) | func (buf *errorBuffer) Write([]byte) (int, error) { return 0,...
method ReadFrom (line 153) | func (buf *errorBuffer) ReadFrom(io.Reader) (int64, error) { return 0,...
method WriteTo (line 154) | func (buf *errorBuffer) WriteTo(io.Writer) (int64, error) { return 0,...
method Seek (line 155) | func (buf *errorBuffer) Seek(int64, int) (int64, error) { return 0,...
type readerAt (line 166) | type readerAt struct
method ReadAt (line 171) | func (r *readerAt) ReadAt(b []byte, off int64) (int, error) {
function newReaderAt (line 184) | func newReaderAt(r io.ReadSeeker) io.ReaderAt {
FILE: buffer_pool_test.go
function TestBufferPool (line 13) | func TestBufferPool(t *testing.T) {
function TestFileBufferPool (line 17) | func TestFileBufferPool(t *testing.T) {
function testBufferPool (line 21) | func testBufferPool(t *testing.T, pool parquet.BufferPool) {
function testBufferPoolWriteBytes (line 52) | func testBufferPoolWriteBytes(t *testing.T, pool parquet.BufferPool) {
function testBufferPoolWriteString (line 65) | func testBufferPoolWriteString(t *testing.T, pool parquet.BufferPool) {
function testBufferPoolCopyToBuffer (line 79) | func testBufferPoolCopyToBuffer(t *testing.T, pool parquet.BufferPool) {
function testBufferPoolCopyFromBuffer (line 94) | func testBufferPoolCopyFromBuffer(t *testing.T, pool parquet.BufferPool) {
function assertBufferContent (line 116) | func assertBufferContent(t *testing.T, b io.ReadSeeker, s string) {
FILE: buffer_test.go
function TestBuffer (line 173) | func TestBuffer(t *testing.T) {
type sortFunc (line 240) | type sortFunc
function unordered (line 242) | func unordered(typ parquet.Type, values []parquet.Value) {}
function ascending (line 244) | func ascending(typ parquet.Type, values []parquet.Value) {
function descending (line 248) | func descending(typ parquet.Type, values []parquet.Value) {
function testBuffer (line 252) | func testBuffer(t *testing.T, node parquet.Node, buffer *parquet.Buffer,...
function TestBufferGenerateBloomFilters (line 357) | func TestBufferGenerateBloomFilters(t *testing.T) {
function TestBufferRoundtripNestedRepeated (line 439) | func TestBufferRoundtripNestedRepeated(t *testing.T) {
function TestBufferRoundtripNestedRepeatedPointer (line 485) | func TestBufferRoundtripNestedRepeatedPointer(t *testing.T) {
function TestRoundtripNestedRepeatedBytes (line 529) | func TestRoundtripNestedRepeatedBytes(t *testing.T) {
function TestBufferSeekToRow (line 574) | func TestBufferSeekToRow(t *testing.T) {
type TestStruct (line 618) | type TestStruct struct
function TestOptionalDictWriteRowGroup (line 622) | func TestOptionalDictWriteRowGroup(t *testing.T) {
function TestNullsSortFirst (line 652) | func TestNullsSortFirst(t *testing.T) {
function generateBenchmarkBufferRows (line 695) | func generateBenchmarkBufferRows(n int) (*parquet.Schema, []parquet.Row) {
function BenchmarkBufferReadRows100x (line 711) | func BenchmarkBufferReadRows100x(b *testing.B) {
function BenchmarkBufferWriteRows100x (line 739) | func BenchmarkBufferWriteRows100x(b *testing.B) {
FILE: column.go
type Column (line 22) | type Column struct
method Type (line 44) | func (c *Column) Type() Type { return c.typ }
method Optional (line 47) | func (c *Column) Optional() bool { return schemaRepetitionTypeOf(c.sch...
method Repeated (line 50) | func (c *Column) Repeated() bool { return schemaRepetitionTypeOf(c.sch...
method Required (line 53) | func (c *Column) Required() bool { return schemaRepetitionTypeOf(c.sch...
method Leaf (line 56) | func (c *Column) Leaf() bool { return c.index >= 0 }
method Fields (line 59) | func (c *Column) Fields() []Field {
method Encoding (line 68) | func (c *Column) Encoding() encoding.Encoding { return c.encoding }
method Compression (line 71) | func (c *Column) Compression() compress.Codec { return c.compression }
method Path (line 74) | func (c *Column) Path() []string { return c.path[1:] }
method Name (line 77) | func (c *Column) Name() string { return c.schema.Name }
method Columns (line 83) | func (c *Column) Columns() []*Column { return c.columns }
method Column (line 86) | func (c *Column) Column(name string) *Column {
method Pages (line 96) | func (c *Column) Pages() Pages {
method Depth (line 164) | func (c *Column) Depth() int { return int(c.depth) }
method MaxRepetitionLevel (line 168) | func (c *Column) MaxRepetitionLevel() int { return int(c.maxRepetition...
method MaxDefinitionLevel (line 172) | func (c *Column) MaxDefinitionLevel() int { return int(c.maxDefinition...
method Index (line 176) | func (c *Column) Index() int { return int(c.index) }
method GoType (line 179) | func (c *Column) GoType() reflect.Type { return goTypeOf(c) }
method Value (line 183) | func (c *Column) Value(base reflect.Value) reflect.Value {
method String (line 188) | func (c *Column) String() string { return c.path.String() + ": " + spr...
method forEachLeaf (line 190) | func (c *Column) forEachLeaf(do func(*Column)) {
method setLevels (line 222) | func (c *Column) setLevels(depth, repetition, definition, index int) (...
method decompress (line 541) | func (c *Column) decompress(compressedPageData []byte, uncompressedPag...
method DecodeDataPageV1 (line 553) | func (c *Column) DecodeDataPageV1(header DataPageHeaderV1, page []byte...
method decodeDataPageV1 (line 557) | func (c *Column) decodeDataPageV1(header DataPageHeaderV1, page *buffe...
method DecodeDataPageV2 (line 600) | func (c *Column) DecodeDataPageV2(header DataPageHeaderV2, page []byte...
method decodeDataPageV2 (line 604) | func (c *Column) decodeDataPageV2(header DataPageHeaderV2, page *buffe...
method decodeDataPage (line 658) | func (c *Column) decodeDataPage(header DataPageHeader, numValues int, ...
method DecodeDictionary (line 762) | func (c *Column) DecodeDictionary(header DictionaryPageHeader, page []...
method decodeDictionary (line 766) | func (c *Column) decodeDictionary(header DictionaryPageHeader, page *b...
type columnPages (line 109) | type columnPages struct
method ReadPage (line 114) | func (c *columnPages) ReadPage() (Page, error) {
method SeekToRow (line 127) | func (c *columnPages) SeekToRow(rowIndex int64) error {
method Close (line 149) | func (c *columnPages) Close() error {
function openColumns (line 200) | func openColumns(file *File) (*Column, error) {
type columnLoader (line 265) | type columnLoader struct
method open (line 271) | func (cl *columnLoader) open(file *File, path []string) (*Column, erro...
function schemaElementTypeOf (line 368) | func schemaElementTypeOf(s *format.SchemaElement) Type {
function schemaRepetitionTypeOf (line 534) | func schemaRepetitionTypeOf(s *format.SchemaElement) format.FieldRepetit...
function decodeLevelsV1 (line 718) | func decodeLevelsV1(enc encoding.Encoding, numValues int, data []byte) (...
function decodeLevelsV2 (line 731) | func decodeLevelsV2(enc encoding.Encoding, numValues int, data []byte, l...
function decodeLevels (line 736) | func decodeLevels(enc encoding.Encoding, numValues int, data []byte) (le...
function skipLevelsV2 (line 753) | func skipLevelsV2(data []byte, length int64) ([]byte, error) {
FILE: column_buffer.go
type ColumnBuffer (line 27) | type ColumnBuffer interface
type columnLevels (line 84) | type columnLevels struct
function columnIndexOfNullable (line 90) | func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, d...
type nullableColumnIndex (line 98) | type nullableColumnIndex struct
method NullPage (line 104) | func (index *nullableColumnIndex) NullPage(i int) bool {
method NullCount (line 108) | func (index *nullableColumnIndex) NullCount(i int) int64 {
type nullOrdering (line 112) | type nullOrdering
function nullsGoFirst (line 114) | func nullsGoFirst(column ColumnBuffer, i, j int, maxDefinitionLevel, def...
function nullsGoLast (line 122) | func nullsGoLast(column ColumnBuffer, i, j int, maxDefinitionLevel, defi...
type reversedColumnBuffer (line 131) | type reversedColumnBuffer struct
method Less (line 133) | func (col *reversedColumnBuffer) Less(i, j int) bool { return col.Colu...
type optionalColumnBuffer (line 146) | type optionalColumnBuffer struct
method Clone (line 167) | func (col *optionalColumnBuffer) Clone() ColumnBuffer {
method Type (line 178) | func (col *optionalColumnBuffer) Type() Type {
method NumValues (line 182) | func (col *optionalColumnBuffer) NumValues() int64 {
method ColumnIndex (line 186) | func (col *optionalColumnBuffer) ColumnIndex() ColumnIndex {
method OffsetIndex (line 190) | func (col *optionalColumnBuffer) OffsetIndex() OffsetIndex {
method BloomFilter (line 194) | func (col *optionalColumnBuffer) BloomFilter() BloomFilter {
method Dictionary (line 198) | func (col *optionalColumnBuffer) Dictionary() Dictionary {
method Column (line 202) | func (col *optionalColumnBuffer) Column() int {
method Pages (line 206) | func (col *optionalColumnBuffer) Pages() Pages {
method Page (line 210) | func (col *optionalColumnBuffer) Page() Page {
method Reset (line 254) | func (col *optionalColumnBuffer) Reset() {
method Size (line 260) | func (col *optionalColumnBuffer) Size() int64 {
method Cap (line 264) | func (col *optionalColumnBuffer) Cap() int { return cap(col.rows) }
method Len (line 266) | func (col *optionalColumnBuffer) Len() int { return len(col.rows) }
method Less (line 268) | func (col *optionalColumnBuffer) Less(i, j int) bool {
method Swap (line 279) | func (col *optionalColumnBuffer) Swap(i, j int) {
method WriteValues (line 289) | func (col *optionalColumnBuffer) WriteValues(values []Value) (n int, e...
method writeValues (line 335) | func (col *optionalColumnBuffer) writeValues(rows sparse.Array, levels...
method ReadValuesAt (line 366) | func (col *optionalColumnBuffer) ReadValuesAt(values []Value, offset i...
function newOptionalColumnBuffer (line 156) | func newOptionalColumnBuffer(base ColumnBuffer, maxDefinitionLevel byte,...
type repeatedColumnBuffer (line 421) | type repeatedColumnBuffer struct
method Clone (line 455) | func (col *repeatedColumnBuffer) Clone() ColumnBuffer {
method Type (line 468) | func (col *repeatedColumnBuffer) Type() Type {
method NumValues (line 472) | func (col *repeatedColumnBuffer) NumValues() int64 {
method ColumnIndex (line 476) | func (col *repeatedColumnBuffer) ColumnIndex() ColumnIndex {
method OffsetIndex (line 480) | func (col *repeatedColumnBuffer) OffsetIndex() OffsetIndex {
method BloomFilter (line 484) | func (col *repeatedColumnBuffer) BloomFilter() BloomFilter {
method Dictionary (line 488) | func (col *repeatedColumnBuffer) Dictionary() Dictionary {
method Column (line 492) | func (col *repeatedColumnBuffer) Column() int {
method Pages (line 496) | func (col *repeatedColumnBuffer) Pages() Pages {
method Page (line 500) | func (col *repeatedColumnBuffer) Page() Page {
method swapReorderingBuffer (line 562) | func (col *repeatedColumnBuffer) swapReorderingBuffer(buf *repeatedCol...
method Reset (line 569) | func (col *repeatedColumnBuffer) Reset() {
method Size (line 576) | func (col *repeatedColumnBuffer) Size() int64 {
method Cap (line 580) | func (col *repeatedColumnBuffer) Cap() int { return cap(col.rows) }
method Len (line 582) | func (col *repeatedColumnBuffer) Len() int { return len(col.rows) }
method Less (line 584) | func (col *repeatedColumnBuffer) Less(i, j int) bool {
method Swap (line 607) | func (col *repeatedColumnBuffer) Swap(i, j int) {
method WriteValues (line 618) | func (col *repeatedColumnBuffer) WriteValues(values []Value) (numValue...
method writeRow (line 650) | func (col *repeatedColumnBuffer) writeRow(row []Value) error {
method writeValues (line 681) | func (col *repeatedColumnBuffer) writeValues(row sparse.Array, levels ...
method ReadValuesAt (line 703) | func (col *repeatedColumnBuffer) ReadValuesAt(values []Value, offset i...
type offsetMapping (line 437) | type offsetMapping struct
function newRepeatedColumnBuffer (line 442) | func newRepeatedColumnBuffer(base ColumnBuffer, maxRepetitionLevel, maxD...
function repeatedRowLength (line 710) | func repeatedRowLength(repetitionLevels []byte) int {
type booleanColumnBuffer (line 735) | type booleanColumnBuffer struct
method Clone (line 749) | func (col *booleanColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 761) | func (col *booleanColumnBuffer) ColumnIndex() ColumnIndex {
method OffsetIndex (line 765) | func (col *booleanColumnBuffer) OffsetIndex() OffsetIndex {
method BloomFilter (line 769) | func (col *booleanColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 771) | func (col *booleanColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 773) | func (col *booleanColumnBuffer) Pages() Pages { return onePage(col.Pag...
method Page (line 775) | func (col *booleanColumnBuffer) Page() Page { return &col.booleanPage }
method Reset (line 777) | func (col *booleanColumnBuffer) Reset() {
method Cap (line 783) | func (col *booleanColumnBuffer) Cap() int { return 8 * cap(col.bits) }
method Len (line 785) | func (col *booleanColumnBuffer) Len() int { return int(col.numValues) }
method Less (line 787) | func (col *booleanColumnBuffer) Less(i, j int) bool {
method valueAt (line 793) | func (col *booleanColumnBuffer) valueAt(i int) bool {
method setValueAt (line 799) | func (col *booleanColumnBuffer) setValueAt(i int, v bool) {
method Swap (line 810) | func (col *booleanColumnBuffer) Swap(i, j int) {
method WriteBooleans (line 817) | func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, err...
method WriteValues (line 822) | func (col *booleanColumnBuffer) WriteValues(values []Value) (int, erro...
method writeValues (line 828) | func (col *booleanColumnBuffer) writeValues(rows sparse.Array, _ colum...
method ReadValuesAt (line 879) | func (col *booleanColumnBuffer) ReadValuesAt(values []Value, offset in...
function newBooleanColumnBuffer (line 737) | func newBooleanColumnBuffer(typ Type, columnIndex int16, numValues int32...
type int32ColumnBuffer (line 899) | type int32ColumnBuffer struct
method Clone (line 911) | func (col *int32ColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 921) | func (col *int32ColumnBuffer) ColumnIndex() ColumnIndex { return int32...
method OffsetIndex (line 923) | func (col *int32ColumnBuffer) OffsetIndex() OffsetIndex { return int32...
method BloomFilter (line 925) | func (col *int32ColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 927) | func (col *int32ColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 929) | func (col *int32ColumnBuffer) Pages() Pages { return onePage(col.Page(...
method Page (line 931) | func (col *int32ColumnBuffer) Page() Page { return &col.int32Page }
method Reset (line 933) | func (col *int32ColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 935) | func (col *int32ColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 937) | func (col *int32ColumnBuffer) Len() int { return len(col.values) }
method Less (line 939) | func (col *int32ColumnBuffer) Less(i, j int) bool { return col.values[...
method Swap (line 941) | func (col *int32ColumnBuffer) Swap(i, j int) {
method Write (line 945) | func (col *int32ColumnBuffer) Write(b []byte) (int, error) {
method WriteInt32s (line 953) | func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) {
method WriteValues (line 958) | func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 964) | func (col *int32ColumnBuffer) writeValues(rows sparse.Array, _ columnL...
method ReadValuesAt (line 974) | func (col *int32ColumnBuffer) ReadValuesAt(values []Value, offset int6...
function newInt32ColumnBuffer (line 901) | func newInt32ColumnBuffer(typ Type, columnIndex int16, numValues int32) ...
type int64ColumnBuffer (line 994) | type int64ColumnBuffer struct
method Clone (line 1006) | func (col *int64ColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1016) | func (col *int64ColumnBuffer) ColumnIndex() ColumnIndex { return int64...
method OffsetIndex (line 1018) | func (col *int64ColumnBuffer) OffsetIndex() OffsetIndex { return int64...
method BloomFilter (line 1020) | func (col *int64ColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1022) | func (col *int64ColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1024) | func (col *int64ColumnBuffer) Pages() Pages { return onePage(col.Page(...
method Page (line 1026) | func (col *int64ColumnBuffer) Page() Page { return &col.int64Page }
method Reset (line 1028) | func (col *int64ColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1030) | func (col *int64ColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1032) | func (col *int64ColumnBuffer) Len() int { return len(col.values) }
method Less (line 1034) | func (col *int64ColumnBuffer) Less(i, j int) bool { return col.values[...
method Swap (line 1036) | func (col *int64ColumnBuffer) Swap(i, j int) {
method Write (line 1040) | func (col *int64ColumnBuffer) Write(b []byte) (int, error) {
method WriteInt64s (line 1048) | func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) {
method WriteValues (line 1053) | func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 1059) | func (col *int64ColumnBuffer) writeValues(rows sparse.Array, _ columnL...
method ReadValuesAt (line 1068) | func (col *int64ColumnBuffer) ReadValuesAt(values []Value, offset int6...
function newInt64ColumnBuffer (line 996) | func newInt64ColumnBuffer(typ Type, columnIndex int16, numValues int32) ...
type int96ColumnBuffer (line 1088) | type int96ColumnBuffer struct
method Clone (line 1100) | func (col *int96ColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1110) | func (col *int96ColumnBuffer) ColumnIndex() ColumnIndex { return int96...
method OffsetIndex (line 1112) | func (col *int96ColumnBuffer) OffsetIndex() OffsetIndex { return int96...
method BloomFilter (line 1114) | func (col *int96ColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1116) | func (col *int96ColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1118) | func (col *int96ColumnBuffer) Pages() Pages { return onePage(col.Page(...
method Page (line 1120) | func (col *int96ColumnBuffer) Page() Page { return &col.int96Page }
method Reset (line 1122) | func (col *int96ColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1124) | func (col *int96ColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1126) | func (col *int96ColumnBuffer) Len() int { return len(col.values) }
method Less (line 1128) | func (col *int96ColumnBuffer) Less(i, j int) bool { return col.values[...
method Swap (line 1130) | func (col *int96ColumnBuffer) Swap(i, j int) {
method Write (line 1134) | func (col *int96ColumnBuffer) Write(b []byte) (int, error) {
method WriteInt96s (line 1142) | func (col *int96ColumnBuffer) WriteInt96s(values []deprecated.Int96) (...
method WriteValues (line 1147) | func (col *int96ColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 1154) | func (col *int96ColumnBuffer) writeValues(rows sparse.Array, _ columnL...
method ReadValuesAt (line 1161) | func (col *int96ColumnBuffer) ReadValuesAt(values []Value, offset int6...
function newInt96ColumnBuffer (line 1090) | func newInt96ColumnBuffer(typ Type, columnIndex int16, numValues int32) ...
type floatColumnBuffer (line 1181) | type floatColumnBuffer struct
method Clone (line 1193) | func (col *floatColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1203) | func (col *floatColumnBuffer) ColumnIndex() ColumnIndex { return float...
method OffsetIndex (line 1205) | func (col *floatColumnBuffer) OffsetIndex() OffsetIndex { return float...
method BloomFilter (line 1207) | func (col *floatColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1209) | func (col *floatColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1211) | func (col *floatColumnBuffer) Pages() Pages { return onePage(col.Page(...
method Page (line 1213) | func (col *floatColumnBuffer) Page() Page { return &col.floatPage }
method Reset (line 1215) | func (col *floatColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1217) | func (col *floatColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1219) | func (col *floatColumnBuffer) Len() int { return len(col.values) }
method Less (line 1221) | func (col *floatColumnBuffer) Less(i, j int) bool { return col.values[...
method Swap (line 1223) | func (col *floatColumnBuffer) Swap(i, j int) {
method Write (line 1227) | func (col *floatColumnBuffer) Write(b []byte) (int, error) {
method WriteFloats (line 1235) | func (col *floatColumnBuffer) WriteFloats(values []float32) (int, erro...
method WriteValues (line 1240) | func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 1246) | func (col *floatColumnBuffer) writeValues(rows sparse.Array, _ columnL...
method ReadValuesAt (line 1255) | func (col *floatColumnBuffer) ReadValuesAt(values []Value, offset int6...
function newFloatColumnBuffer (line 1183) | func newFloatColumnBuffer(typ Type, columnIndex int16, numValues int32) ...
type doubleColumnBuffer (line 1275) | type doubleColumnBuffer struct
method Clone (line 1287) | func (col *doubleColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1297) | func (col *doubleColumnBuffer) ColumnIndex() ColumnIndex { return doub...
method OffsetIndex (line 1299) | func (col *doubleColumnBuffer) OffsetIndex() OffsetIndex { return doub...
method BloomFilter (line 1301) | func (col *doubleColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1303) | func (col *doubleColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1305) | func (col *doubleColumnBuffer) Pages() Pages { return onePage(col.Page...
method Page (line 1307) | func (col *doubleColumnBuffer) Page() Page { return &col.doublePage }
method Reset (line 1309) | func (col *doubleColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1311) | func (col *doubleColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1313) | func (col *doubleColumnBuffer) Len() int { return len(col.values) }
method Less (line 1315) | func (col *doubleColumnBuffer) Less(i, j int) bool { return col.values...
method Swap (line 1317) | func (col *doubleColumnBuffer) Swap(i, j int) {
method Write (line 1321) | func (col *doubleColumnBuffer) Write(b []byte) (int, error) {
method WriteDoubles (line 1329) | func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, er...
method WriteValues (line 1334) | func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 1340) | func (col *doubleColumnBuffer) writeValues(rows sparse.Array, _ column...
method ReadValuesAt (line 1349) | func (col *doubleColumnBuffer) ReadValuesAt(values []Value, offset int...
function newDoubleColumnBuffer (line 1277) | func newDoubleColumnBuffer(typ Type, columnIndex int16, numValues int32)...
type byteArrayColumnBuffer (line 1369) | type byteArrayColumnBuffer struct
method Clone (line 1387) | func (col *byteArrayColumnBuffer) Clone() ColumnBuffer {
method cloneLengths (line 1399) | func (col *byteArrayColumnBuffer) cloneLengths() []uint32 {
method ColumnIndex (line 1405) | func (col *byteArrayColumnBuffer) ColumnIndex() ColumnIndex {
method OffsetIndex (line 1409) | func (col *byteArrayColumnBuffer) OffsetIndex() OffsetIndex {
method BloomFilter (line 1413) | func (col *byteArrayColumnBuffer) BloomFilter() BloomFilter { return n...
method Dictionary (line 1415) | func (col *byteArrayColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1417) | func (col *byteArrayColumnBuffer) Pages() Pages { return onePage(col.P...
method Page (line 1419) | func (col *byteArrayColumnBuffer) Page() Page {
method Reset (line 1442) | func (col *byteArrayColumnBuffer) Reset() {
method NumRows (line 1448) | func (col *byteArrayColumnBuffer) NumRows() int64 { return int64(col.L...
method NumValues (line 1450) | func (col *byteArrayColumnBuffer) NumValues() int64 { return int64(col...
method Cap (line 1452) | func (col *byteArrayColumnBuffer) Cap() int { return cap(col.lengths) }
method Len (line 1454) | func (col *byteArrayColumnBuffer) Len() int { return len(col.lengths) }
method Less (line 1456) | func (col *byteArrayColumnBuffer) Less(i, j int) bool {
method Swap (line 1460) | func (col *byteArrayColumnBuffer) Swap(i, j int) {
method Write (line 1465) | func (col *byteArrayColumnBuffer) Write(b []byte) (int, error) {
method WriteByteArrays (line 1470) | func (col *byteArrayColumnBuffer) WriteByteArrays(values []byte) (int,...
method writeByteArrays (line 1475) | func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (coun...
method WriteValues (line 1489) | func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, er...
method writeValues (line 1495) | func (col *byteArrayColumnBuffer) writeValues(rows sparse.Array, _ col...
method ReadValuesAt (line 1502) | func (col *byteArrayColumnBuffer) ReadValuesAt(values []Value, offset ...
method append (line 1522) | func (col *byteArrayColumnBuffer) append(value string) {
method index (line 1528) | func (col *byteArrayColumnBuffer) index(i int) []byte {
function newByteArrayColumnBuffer (line 1375) | func newByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int...
type fixedLenByteArrayColumnBuffer (line 1535) | type fixedLenByteArrayColumnBuffer struct
method Clone (line 1553) | func (col *fixedLenByteArrayColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1565) | func (col *fixedLenByteArrayColumnBuffer) ColumnIndex() ColumnIndex {
method OffsetIndex (line 1569) | func (col *fixedLenByteArrayColumnBuffer) OffsetIndex() OffsetIndex {
method BloomFilter (line 1573) | func (col *fixedLenByteArrayColumnBuffer) BloomFilter() BloomFilter { ...
method Dictionary (line 1575) | func (col *fixedLenByteArrayColumnBuffer) Dictionary() Dictionary { re...
method Pages (line 1577) | func (col *fixedLenByteArrayColumnBuffer) Pages() Pages { return onePa...
method Page (line 1579) | func (col *fixedLenByteArrayColumnBuffer) Page() Page { return &col.fi...
method Reset (line 1581) | func (col *fixedLenByteArrayColumnBuffer) Reset() { col.data = col.dat...
method Cap (line 1583) | func (col *fixedLenByteArrayColumnBuffer) Cap() int { return cap(col.d...
method Len (line 1585) | func (col *fixedLenByteArrayColumnBuffer) Len() int { return len(col.d...
method Less (line 1587) | func (col *fixedLenByteArrayColumnBuffer) Less(i, j int) bool {
method Swap (line 1591) | func (col *fixedLenByteArrayColumnBuffer) Swap(i, j int) {
method index (line 1598) | func (col *fixedLenByteArrayColumnBuffer) index(i int) []byte {
method Write (line 1604) | func (col *fixedLenByteArrayColumnBuffer) Write(b []byte) (int, error) {
method WriteFixedLenByteArrays (line 1609) | func (col *fixedLenByteArrayColumnBuffer) WriteFixedLenByteArrays(valu...
method WriteValues (line 1618) | func (col *fixedLenByteArrayColumnBuffer) WriteValues(values []Value) ...
method writeValues (line 1625) | func (col *fixedLenByteArrayColumnBuffer) writeValues(rows sparse.Arra...
method ReadValuesAt (line 1643) | func (col *fixedLenByteArrayColumnBuffer) ReadValuesAt(values []Value,...
function newFixedLenByteArrayColumnBuffer (line 1540) | func newFixedLenByteArrayColumnBuffer(typ Type, columnIndex int16, numVa...
type uint32ColumnBuffer (line 1663) | type uint32ColumnBuffer struct
method Clone (line 1675) | func (col *uint32ColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1685) | func (col *uint32ColumnBuffer) ColumnIndex() ColumnIndex { return uint...
method OffsetIndex (line 1687) | func (col *uint32ColumnBuffer) OffsetIndex() OffsetIndex { return uint...
method BloomFilter (line 1689) | func (col *uint32ColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1691) | func (col *uint32ColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1693) | func (col *uint32ColumnBuffer) Pages() Pages { return onePage(col.Page...
method Page (line 1695) | func (col *uint32ColumnBuffer) Page() Page { return &col.uint32Page }
method Reset (line 1697) | func (col *uint32ColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1699) | func (col *uint32ColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1701) | func (col *uint32ColumnBuffer) Len() int { return len(col.values) }
method Less (line 1703) | func (col *uint32ColumnBuffer) Less(i, j int) bool { return col.values...
method Swap (line 1705) | func (col *uint32ColumnBuffer) Swap(i, j int) {
method Write (line 1709) | func (col *uint32ColumnBuffer) Write(b []byte) (int, error) {
method WriteUint32s (line 1717) | func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, err...
method WriteValues (line 1722) | func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 1728) | func (col *uint32ColumnBuffer) writeValues(rows sparse.Array, _ column...
method ReadValuesAt (line 1737) | func (col *uint32ColumnBuffer) ReadValuesAt(values []Value, offset int...
function newUint32ColumnBuffer (line 1665) | func newUint32ColumnBuffer(typ Type, columnIndex int16, numValues int32)...
type uint64ColumnBuffer (line 1757) | type uint64ColumnBuffer struct
method Clone (line 1769) | func (col *uint64ColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1779) | func (col *uint64ColumnBuffer) ColumnIndex() ColumnIndex { return uint...
method OffsetIndex (line 1781) | func (col *uint64ColumnBuffer) OffsetIndex() OffsetIndex { return uint...
method BloomFilter (line 1783) | func (col *uint64ColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1785) | func (col *uint64ColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1787) | func (col *uint64ColumnBuffer) Pages() Pages { return onePage(col.Page...
method Page (line 1789) | func (col *uint64ColumnBuffer) Page() Page { return &col.uint64Page }
method Reset (line 1791) | func (col *uint64ColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1793) | func (col *uint64ColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1795) | func (col *uint64ColumnBuffer) Len() int { return len(col.values) }
method Less (line 1797) | func (col *uint64ColumnBuffer) Less(i, j int) bool { return col.values...
method Swap (line 1799) | func (col *uint64ColumnBuffer) Swap(i, j int) {
method Write (line 1803) | func (col *uint64ColumnBuffer) Write(b []byte) (int, error) {
method WriteUint64s (line 1811) | func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, err...
method WriteValues (line 1816) | func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 1822) | func (col *uint64ColumnBuffer) writeValues(rows sparse.Array, _ column...
method ReadValuesAt (line 1831) | func (col *uint64ColumnBuffer) ReadValuesAt(values []Value, offset int...
function newUint64ColumnBuffer (line 1759) | func newUint64ColumnBuffer(typ Type, columnIndex int16, numValues int32)...
type be128ColumnBuffer (line 1851) | type be128ColumnBuffer struct
method Clone (line 1863) | func (col *be128ColumnBuffer) Clone() ColumnBuffer {
method ColumnIndex (line 1873) | func (col *be128ColumnBuffer) ColumnIndex() ColumnIndex {
method OffsetIndex (line 1877) | func (col *be128ColumnBuffer) OffsetIndex() OffsetIndex {
method BloomFilter (line 1881) | func (col *be128ColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1883) | func (col *be128ColumnBuffer) Dictionary() Dictionary { return nil }
method Pages (line 1885) | func (col *be128ColumnBuffer) Pages() Pages { return onePage(col.Page(...
method Page (line 1887) | func (col *be128ColumnBuffer) Page() Page { return &col.be128Page }
method Reset (line 1889) | func (col *be128ColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1891) | func (col *be128ColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1893) | func (col *be128ColumnBuffer) Len() int { return len(col.values) }
method Less (line 1895) | func (col *be128ColumnBuffer) Less(i, j int) bool {
method Swap (line 1899) | func (col *be128ColumnBuffer) Swap(i, j int) {
method WriteValues (line 1903) | func (col *be128ColumnBuffer) WriteValues(values []Value) (int, error) {
method writeValues (line 1916) | func (col *be128ColumnBuffer) writeValues(rows sparse.Array, _ columnL...
method ReadValuesAt (line 1925) | func (col *be128ColumnBuffer) ReadValuesAt(values []Value, offset int6...
function newBE128ColumnBuffer (line 1853) | func newBE128ColumnBuffer(typ Type, columnIndex int16, numValues int32) ...
FILE: column_buffer_amd64.go
function broadcastValueInt32 (line 12) | func broadcastValueInt32(dst []int32, src int8) {
function broadcastRangeInt32AVX2 (line 17) | func broadcastRangeInt32AVX2(dst []int32, base int32)
function broadcastRangeInt32 (line 19) | func broadcastRangeInt32(dst []int32, base int32) {
function writePointersBE128 (line 30) | func writePointersBE128(values [][16]byte, rows sparse.Array)
FILE: column_buffer_go18.go
type writeRowsFunc (line 26) | type writeRowsFunc
function writeRowsFuncOf (line 31) | func writeRowsFuncOf(t reflect.Type, schema *Schema, path columnPath) wr...
function writeRowsFuncOfRequired (line 81) | func writeRowsFuncOfRequired(t reflect.Type, schema *Schema, path column...
function writeRowsFuncOfOptional (line 90) | func writeRowsFuncOfOptional(t reflect.Type, schema *Schema, path column...
function writeRowsFuncOfPointer (line 189) | func writeRowsFuncOfPointer(t reflect.Type, schema *Schema, path columnP...
function writeRowsFuncOfSlice (line 241) | func writeRowsFuncOfSlice(t reflect.Type, schema *Schema, path columnPat...
function writeRowsFuncOfStruct (line 289) | func writeRowsFuncOfStruct(t reflect.Type, schema *Schema, path columnPa...
function writeRowsFuncOfMap (line 343) | func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath)...
function writeRowsFuncOfJSON (line 405) | func writeRowsFuncOfJSON(t reflect.Type, schema *Schema, path columnPath...
function writeRowsFuncOfTime (line 443) | func writeRowsFuncOfTime(_ reflect.Type, schema *Schema, path columnPath...
FILE: column_buffer_purego.go
function broadcastValueInt32 (line 7) | func broadcastValueInt32(dst []int32, src int8) {
function broadcastRangeInt32 (line 14) | func broadcastRangeInt32(dst []int32, base int32) {
function writePointersBE128 (line 20) | func writePointersBE128(values [][16]byte, rows sparse.Array) {
FILE: column_buffer_test.go
function TestBroadcastValueInt32 (line 7) | func TestBroadcastValueInt32(t *testing.T) {
function TestBroadcastRangeInt32 (line 18) | func TestBroadcastRangeInt32(t *testing.T) {
function BenchmarkBroadcastValueInt32 (line 29) | func BenchmarkBroadcastValueInt32(b *testing.B) {
function BenchmarkBroadcastRangeInt32 (line 37) | func BenchmarkBroadcastRangeInt32(b *testing.B) {
function TestIssue501 (line 46) | func TestIssue501(t *testing.T) {
FILE: column_chunk.go
type ColumnChunk (line 8) | type ColumnChunk interface
type pageAndValueWriter (line 37) | type pageAndValueWriter interface
type readRowsFunc (line 42) | type readRowsFunc
function readRowsFuncOf (line 44) | func readRowsFuncOf(node Node, columnIndex int, repetitionDepth byte) (i...
function readRowsFuncOfRepeated (line 65) | func readRowsFuncOfRepeated(read readRowsFunc, repetitionDepth byte) rea...
function readRowsFuncOfGroup (line 116) | func readRowsFuncOfGroup(node Node, columnIndex int, repetitionDepth byt...
function readRowsFuncOfLeaf (line 162) | func readRowsFuncOfLeaf(columnIndex int, repetitionDepth byte) (int, rea...
FILE: column_index.go
type ColumnIndex (line 10) | type ColumnIndex interface
function NewColumnIndex (line 38) | func NewColumnIndex(kind Kind, index *format.ColumnIndex) ColumnIndex {
type formatColumnIndex (line 45) | type formatColumnIndex struct
method NumPages (line 50) | func (f *formatColumnIndex) NumPages() int {
method NullCount (line 54) | func (f *formatColumnIndex) NullCount(i int) int64 {
method NullPage (line 61) | func (f *formatColumnIndex) NullPage(i int) bool {
method MinValue (line 65) | func (f *formatColumnIndex) MinValue(i int) Value {
method MaxValue (line 72) | func (f *formatColumnIndex) MaxValue(i int) Value {
method IsAscending (line 79) | func (f *formatColumnIndex) IsAscending() bool {
method IsDescending (line 83) | func (f *formatColumnIndex) IsDescending() bool {
type fileColumnIndex (line 87) | type fileColumnIndex struct
method NumPages (line 89) | func (i fileColumnIndex) NumPages() int {
method NullCount (line 93) | func (i fileColumnIndex) NullCount(j int) int64 {
method NullPage (line 100) | func (i fileColumnIndex) NullPage(j int) bool {
method MinValue (line 104) | func (i fileColumnIndex) MinValue(j int) Value {
method MaxValue (line 111) | func (i fileColumnIndex) MaxValue(j int) Value {
method IsAscending (line 118) | func (i fileColumnIndex) IsAscending() bool {
method IsDescending (line 122) | func (i fileColumnIndex) IsDescending() bool {
method makeValue (line 126) | func (i *fileColumnIndex) makeValue(b []byte) Value {
type emptyColumnIndex (line 130) | type emptyColumnIndex struct
method NumPages (line 132) | func (emptyColumnIndex) NumPages() int { return 0 }
method NullCount (line 133) | func (emptyColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 134) | func (emptyColumnIndex) NullPage(int) bool { return false }
method MinValue (line 135) | func (emptyColumnIndex) MinValue(int) Value { return Value{} }
method MaxValue (line 136) | func (emptyColumnIndex) MaxValue(int) Value { return Value{} }
method IsAscending (line 137) | func (emptyColumnIndex) IsAscending() bool { return false }
method IsDescending (line 138) | func (emptyColumnIndex) IsDescending() bool { return false }
type booleanColumnIndex (line 140) | type booleanColumnIndex struct
method NumPages (line 142) | func (i booleanColumnIndex) NumPages() int { return 1 }
method NullCount (line 143) | func (i booleanColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 144) | func (i booleanColumnIndex) NullPage(int) bool { return false }
method MinValue (line 145) | func (i booleanColumnIndex) MinValue(int) Value { return makeValueBoo...
method MaxValue (line 146) | func (i booleanColumnIndex) MaxValue(int) Value { return makeValueBoo...
method IsAscending (line 147) | func (i booleanColumnIndex) IsAscending() bool { return false }
method IsDescending (line 148) | func (i booleanColumnIndex) IsDescending() bool { return false }
type int32ColumnIndex (line 150) | type int32ColumnIndex struct
method NumPages (line 152) | func (i int32ColumnIndex) NumPages() int { return 1 }
method NullCount (line 153) | func (i int32ColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 154) | func (i int32ColumnIndex) NullPage(int) bool { return false }
method MinValue (line 155) | func (i int32ColumnIndex) MinValue(int) Value { return makeValueInt32...
method MaxValue (line 156) | func (i int32ColumnIndex) MaxValue(int) Value { return makeValueInt32...
method IsAscending (line 157) | func (i int32ColumnIndex) IsAscending() bool { return false }
method IsDescending (line 158) | func (i int32ColumnIndex) IsDescending() bool { return false }
type int64ColumnIndex (line 160) | type int64ColumnIndex struct
method NumPages (line 162) | func (i int64ColumnIndex) NumPages() int { return 1 }
method NullCount (line 163) | func (i int64ColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 164) | func (i int64ColumnIndex) NullPage(int) bool { return false }
method MinValue (line 165) | func (i int64ColumnIndex) MinValue(int) Value { return makeValueInt64...
method MaxValue (line 166) | func (i int64ColumnIndex) MaxValue(int) Value { return makeValueInt64...
method IsAscending (line 167) | func (i int64ColumnIndex) IsAscending() bool { return false }
method IsDescending (line 168) | func (i int64ColumnIndex) IsDescending() bool { return false }
type int96ColumnIndex (line 170) | type int96ColumnIndex struct
method NumPages (line 172) | func (i int96ColumnIndex) NumPages() int { return 1 }
method NullCount (line 173) | func (i int96ColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 174) | func (i int96ColumnIndex) NullPage(int) bool { return false }
method MinValue (line 175) | func (i int96ColumnIndex) MinValue(int) Value { return makeValueInt96...
method MaxValue (line 176) | func (i int96ColumnIndex) MaxValue(int) Value { return makeValueInt96...
method IsAscending (line 177) | func (i int96ColumnIndex) IsAscending() bool { return false }
method IsDescending (line 178) | func (i int96ColumnIndex) IsDescending() bool { return false }
type floatColumnIndex (line 180) | type floatColumnIndex struct
method NumPages (line 182) | func (i floatColumnIndex) NumPages() int { return 1 }
method NullCount (line 183) | func (i floatColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 184) | func (i floatColumnIndex) NullPage(int) bool { return false }
method MinValue (line 185) | func (i floatColumnIndex) MinValue(int) Value { return makeValueFloat...
method MaxValue (line 186) | func (i floatColumnIndex) MaxValue(int) Value { return makeValueFloat...
method IsAscending (line 187) | func (i floatColumnIndex) IsAscending() bool { return false }
method IsDescending (line 188) | func (i floatColumnIndex) IsDescending() bool { return false }
type doubleColumnIndex (line 190) | type doubleColumnIndex struct
method NumPages (line 192) | func (i doubleColumnIndex) NumPages() int { return 1 }
method NullCount (line 193) | func (i doubleColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 194) | func (i doubleColumnIndex) NullPage(int) bool { return false }
method MinValue (line 195) | func (i doubleColumnIndex) MinValue(int) Value { return makeValueDoub...
method MaxValue (line 196) | func (i doubleColumnIndex) MaxValue(int) Value { return makeValueDoub...
method IsAscending (line 197) | func (i doubleColumnIndex) IsAscending() bool { return false }
method IsDescending (line 198) | func (i doubleColumnIndex) IsDescending() bool { return false }
type byteArrayColumnIndex (line 200) | type byteArrayColumnIndex struct
method NumPages (line 202) | func (i byteArrayColumnIndex) NumPages() int { return 1 }
method NullCount (line 203) | func (i byteArrayColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 204) | func (i byteArrayColumnIndex) NullPage(int) bool { return false }
method MinValue (line 205) | func (i byteArrayColumnIndex) MinValue(int) Value { return makeValueB...
method MaxValue (line 206) | func (i byteArrayColumnIndex) MaxValue(int) Value { return makeValueB...
method IsAscending (line 207) | func (i byteArrayColumnIndex) IsAscending() bool { return false }
method IsDescending (line 208) | func (i byteArrayColumnIndex) IsDescending() bool { return false }
type fixedLenByteArrayColumnIndex (line 210) | type fixedLenByteArrayColumnIndex struct
method NumPages (line 212) | func (i fixedLenByteArrayColumnIndex) NumPages() int { return 1 }
method NullCount (line 213) | func (i fixedLenByteArrayColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 214) | func (i fixedLenByteArrayColumnIndex) NullPage(int) bool { return fa...
method MinValue (line 215) | func (i fixedLenByteArrayColumnIndex) MinValue(int) Value {
method MaxValue (line 218) | func (i fixedLenByteArrayColumnIndex) MaxValue(int) Value {
method IsAscending (line 221) | func (i fixedLenByteArrayColumnIndex) IsAscending() bool { return fal...
method IsDescending (line 222) | func (i fixedLenByteArrayColumnIndex) IsDescending() bool { return fal...
type uint32ColumnIndex (line 224) | type uint32ColumnIndex struct
method NumPages (line 226) | func (i uint32ColumnIndex) NumPages() int { return 1 }
method NullCount (line 227) | func (i uint32ColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 228) | func (i uint32ColumnIndex) NullPage(int) bool { return false }
method MinValue (line 229) | func (i uint32ColumnIndex) MinValue(int) Value { return makeValueUint...
method MaxValue (line 230) | func (i uint32ColumnIndex) MaxValue(int) Value { return makeValueUint...
method IsAscending (line 231) | func (i uint32ColumnIndex) IsAscending() bool { return false }
method IsDescending (line 232) | func (i uint32ColumnIndex) IsDescending() bool { return false }
type uint64ColumnIndex (line 234) | type uint64ColumnIndex struct
method NumPages (line 236) | func (i uint64ColumnIndex) NumPages() int { return 1 }
method NullCount (line 237) | func (i uint64ColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 238) | func (i uint64ColumnIndex) NullPage(int) bool { return false }
method MinValue (line 239) | func (i uint64ColumnIndex) MinValue(int) Value { return makeValueUint...
method MaxValue (line 240) | func (i uint64ColumnIndex) MaxValue(int) Value { return makeValueUint...
method IsAscending (line 241) | func (i uint64ColumnIndex) IsAscending() bool { return false }
method IsDescending (line 242) | func (i uint64ColumnIndex) IsDescending() bool { return false }
type be128ColumnIndex (line 244) | type be128ColumnIndex struct
method NumPages (line 246) | func (i be128ColumnIndex) NumPages() int { return 1 }
method NullCount (line 247) | func (i be128ColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 248) | func (i be128ColumnIndex) NullPage(int) bool { return false }
method MinValue (line 249) | func (i be128ColumnIndex) MinValue(int) Value { return makeValueBytes...
method MaxValue (line 250) | func (i be128ColumnIndex) MaxValue(int) Value { return makeValueBytes...
method IsAscending (line 251) | func (i be128ColumnIndex) IsAscending() bool { return false }
method IsDescending (line 252) | func (i be128ColumnIndex) IsDescending() bool { return false }
type ColumnIndexer (line 259) | type ColumnIndexer interface
type baseColumnIndexer (line 275) | type baseColumnIndexer struct
method reset (line 280) | func (i *baseColumnIndexer) reset() {
method observe (line 285) | func (i *baseColumnIndexer) observe(numValues, numNulls int64) {
method columnIndex (line 290) | func (i *baseColumnIndexer) columnIndex(minValues, maxValues [][]byte,...
type booleanColumnIndexer (line 300) | type booleanColumnIndexer struct
method Reset (line 310) | func (i *booleanColumnIndexer) Reset() {
method IndexPage (line 316) | func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, mi...
method ColumnIndex (line 322) | func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex {
function newBooleanColumnIndexer (line 306) | func newBooleanColumnIndexer() *booleanColumnIndexer {
type int32ColumnIndexer (line 331) | type int32ColumnIndexer struct
method Reset (line 341) | func (i *int32ColumnIndexer) Reset() {
method IndexPage (line 347) | func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min,...
method ColumnIndex (line 353) | func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex {
function newInt32ColumnIndexer (line 337) | func newInt32ColumnIndexer() *int32ColumnIndexer {
type int64ColumnIndexer (line 362) | type int64ColumnIndexer struct
method Reset (line 372) | func (i *int64ColumnIndexer) Reset() {
method IndexPage (line 378) | func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min,...
method ColumnIndex (line 384) | func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex {
function newInt64ColumnIndexer (line 368) | func newInt64ColumnIndexer() *int64ColumnIndexer {
type int96ColumnIndexer (line 393) | type int96ColumnIndexer struct
method Reset (line 403) | func (i *int96ColumnIndexer) Reset() {
method IndexPage (line 409) | func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min,...
method ColumnIndex (line 415) | func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex {
function newInt96ColumnIndexer (line 399) | func newInt96ColumnIndexer() *int96ColumnIndexer {
type floatColumnIndexer (line 424) | type floatColumnIndexer struct
method Reset (line 434) | func (i *floatColumnIndexer) Reset() {
method IndexPage (line 440) | func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min,...
method ColumnIndex (line 446) | func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex {
function newFloatColumnIndexer (line 430) | func newFloatColumnIndexer() *floatColumnIndexer {
type doubleColumnIndexer (line 455) | type doubleColumnIndexer struct
method Reset (line 465) | func (i *doubleColumnIndexer) Reset() {
method IndexPage (line 471) | func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min...
method ColumnIndex (line 477) | func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex {
function newDoubleColumnIndexer (line 461) | func newDoubleColumnIndexer() *doubleColumnIndexer {
type byteArrayColumnIndexer (line 486) | type byteArrayColumnIndexer struct
method Reset (line 497) | func (i *byteArrayColumnIndexer) Reset() {
method IndexPage (line 503) | func (i *byteArrayColumnIndexer) IndexPage(numValues, numNulls int64, ...
method ColumnIndex (line 509) | func (i *byteArrayColumnIndexer) ColumnIndex() format.ColumnIndex {
function newByteArrayColumnIndexer (line 493) | func newByteArrayColumnIndexer(sizeLimit int) *byteArrayColumnIndexer {
type fixedLenByteArrayColumnIndexer (line 528) | type fixedLenByteArrayColumnIndexer struct
method Reset (line 543) | func (i *fixedLenByteArrayColumnIndexer) Reset() {
method IndexPage (line 549) | func (i *fixedLenByteArrayColumnIndexer) IndexPage(numValues, numNulls...
method ColumnIndex (line 555) | func (i *fixedLenByteArrayColumnIndexer) ColumnIndex() format.ColumnIn...
function newFixedLenByteArrayColumnIndexer (line 536) | func newFixedLenByteArrayColumnIndexer(size, sizeLimit int) *fixedLenByt...
type uint32ColumnIndexer (line 574) | type uint32ColumnIndexer struct
method Reset (line 584) | func (i *uint32ColumnIndexer) Reset() {
method IndexPage (line 590) | func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min...
method ColumnIndex (line 596) | func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex {
function newUint32ColumnIndexer (line 580) | func newUint32ColumnIndexer() *uint32ColumnIndexer {
type uint64ColumnIndexer (line 605) | type uint64ColumnIndexer struct
method Reset (line 615) | func (i *uint64ColumnIndexer) Reset() {
method IndexPage (line 621) | func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min...
method ColumnIndex (line 627) | func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex {
function newUint64ColumnIndexer (line 611) | func newUint64ColumnIndexer() *uint64ColumnIndexer {
type be128ColumnIndexer (line 636) | type be128ColumnIndexer struct
method Reset (line 646) | func (i *be128ColumnIndexer) Reset() {
method IndexPage (line 652) | func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min,...
method ColumnIndex (line 662) | func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex {
function newBE128ColumnIndexer (line 642) | func newBE128ColumnIndexer() *be128ColumnIndexer {
function truncateLargeMinByteArrayValue (line 673) | func truncateLargeMinByteArrayValue(value []byte, sizeLimit int) []byte {
function truncateLargeMaxByteArrayValue (line 682) | func truncateLargeMaxByteArrayValue(value []byte, sizeLimit int) []byte {
function incrementByteArrayInplace (line 692) | func incrementByteArrayInplace(value []byte) {
function splitByteArrays (line 705) | func splitByteArrays(data []byte) [][]byte {
function splitFixedLenByteArrays (line 722) | func splitFixedLenByteArrays(data []byte, size int) [][]byte {
function boundaryOrderOf (line 733) | func boundaryOrderOf(minOrder, maxOrder int) format.BoundaryOrder {
FILE: column_index_internal_test.go
function TestIncrementByteArrayInplace (line 8) | func TestIncrementByteArrayInplace(t *testing.T) {
FILE: column_index_test.go
function TestBinaryColumnIndexMinMax (line 9) | func TestBinaryColumnIndexMinMax(t *testing.T) {
FILE: column_mapping.go
type LeafColumn (line 4) | type LeafColumn struct
function columnMappingOf (line 12) | func columnMappingOf(schema Node) (mapping columnMappingGroup, columns [...
type columnMapping (line 39) | type columnMapping interface
type columnMappingGroup (line 43) | type columnMappingGroup
method lookup (line 45) | func (group columnMappingGroup) lookup(path columnPath) leafColumn {
method lookupClosest (line 55) | func (group columnMappingGroup) lookupClosest(path columnPath) leafCol...
type columnMappingLeaf (line 79) | type columnMappingLeaf struct
method lookup (line 83) | func (leaf *columnMappingLeaf) lookup(path columnPath) leafColumn {
FILE: column_mapping_test.go
function ExampleSchema_Lookup (line 10) | func ExampleSchema_Lookup() {
FILE: column_path.go
type columnPath (line 7) | type columnPath
method append (line 9) | func (path columnPath) append(names ...string) columnPath {
method equal (line 13) | func (path columnPath) equal(other columnPath) bool {
method less (line 17) | func (path columnPath) less(other columnPath) bool {
method String (line 21) | func (path columnPath) String() string {
function stringsAreEqual (line 25) | func stringsAreEqual(strings1, strings2 []string) bool {
function stringsAreOrdered (line 39) | func stringsAreOrdered(strings1, strings2 []string) bool {
type leafColumn (line 55) | type leafColumn struct
function forEachLeafColumnOf (line 63) | func forEachLeafColumnOf(node Node, do func(leafColumn)) {
function forEachLeafColumn (line 67) | func forEachLeafColumn(node Node, path columnPath, columnIndex, maxRepet...
function lookupColumnPath (line 101) | func lookupColumnPath(node Node, path columnPath) Node {
function hasColumnPath (line 109) | func hasColumnPath(node Node, path columnPath) bool {
FILE: column_test.go
function TestColumnPageIndex (line 14) | func TestColumnPageIndex(t *testing.T) {
function testColumnPageIndexWithBuffer (line 113) | func testColumnPageIndexWithBuffer(t *testing.T, rows rows) bool {
function checkRowGroupColumnIndex (line 131) | func checkRowGroupColumnIndex(rowGroup parquet.RowGroup) error {
function checkColumnChunkColumnIndex (line 140) | func checkColumnChunkColumnIndex(columnChunk parquet.ColumnChunk) error {
function checkRowGroupOffsetIndex (line 218) | func checkRowGroupOffsetIndex(rowGroup parquet.RowGroup) error {
function checkColumnChunkOffsetIndex (line 227) | func checkColumnChunkOffsetIndex(columnChunk parquet.ColumnChunk) error {
function testColumnPageIndexWithFile (line 255) | func testColumnPageIndexWithFile(t *testing.T, rows rows) bool {
function checkFileColumnIndex (line 288) | func checkFileColumnIndex(f *parquet.File) error {
function checkFileOffsetIndex (line 355) | func checkFileOffsetIndex(f *parquet.File) error {
type fileColumnIndex (line 401) | type fileColumnIndex struct
method NumPages (line 406) | func (i *fileColumnIndex) NumPages() int { return len(i...
method NullCount (line 407) | func (i *fileColumnIndex) NullCount(j int) int64 { return i.Nul...
method NullPage (line 408) | func (i *fileColumnIndex) NullPage(j int) bool { return i.Nul...
method MinValue (line 409) | func (i *fileColumnIndex) MinValue(j int) parquet.Value { return i.kin...
method MaxValue (line 410) | func (i *fileColumnIndex) MaxValue(j int) parquet.Value { return i.kin...
method IsAscending (line 411) | func (i *fileColumnIndex) IsAscending() bool { return i.Bou...
method IsDescending (line 412) | func (i *fileColumnIndex) IsDescending() bool { return i.Bou...
type fileOffsetIndex (line 414) | type fileOffsetIndex
method NumPages (line 416) | func (i *fileOffsetIndex) NumPages() int { return len(i.PageLocat...
method Offset (line 417) | func (i *fileOffsetIndex) Offset(j int) int64 { return i.PageLocations...
method CompressedPageSize (line 418) | func (i *fileOffsetIndex) CompressedPageSize(j int) int64 {
method FirstRowIndex (line 421) | func (i *fileOffsetIndex) FirstRowIndex(j int) int64 { return i.PageLo...
type columnStats (line 423) | type columnStats struct
method observe (line 434) | func (c *columnStats) observe(value parquet.Value) {
method pageRead (line 448) | func (c *columnStats) pageRead() {
function newColumnStats (line 430) | func newColumnStats(columnType parquet.Type) *columnStats {
type indexOrder (line 452) | type indexOrder
method String (line 461) | func (o indexOrder) String() string {
constant invalidIndexOrder (line 455) | invalidIndexOrder indexOrder = iota
constant unorderedIndexOrder (line 456) | unorderedIndexOrder
constant ascendingIndexOrder (line 457) | ascendingIndexOrder
constant descendingIndexOrder (line 458) | descendingIndexOrder
function columnIndexOrder (line 474) | func columnIndexOrder(index parquet.ColumnIndex) indexOrder {
function observedIndexOrder (line 487) | func observedIndexOrder(columnType parquet.Type, minValues []parquet.Val...
function valueOrder (line 501) | func valueOrder(columnType parquet.Type, values []parquet.Value) indexOr...
FILE: compare.go
function CompareDescending (line 13) | func CompareDescending(cmp func(Value, Value) int) func(Value, Value) int {
function CompareNullsFirst (line 21) | func CompareNullsFirst(cmp func(Value, Value) int) func(Value, Value) int {
function CompareNullsLast (line 41) | func CompareNullsLast(cmp func(Value, Value) int) func(Value, Value) int {
function compareBool (line 57) | func compareBool(v1, v2 bool) int {
function compareInt32 (line 68) | func compareInt32(v1, v2 int32) int {
function compareInt64 (line 79) | func compareInt64(v1, v2 int64) int {
function compareInt96 (line 90) | func compareInt96(v1, v2 deprecated.Int96) int {
function compareFloat32 (line 101) | func compareFloat32(v1, v2 float32) int {
function compareFloat64 (line 112) | func compareFloat64(v1, v2 float64) int {
function compareUint32 (line 123) | func compareUint32(v1, v2 uint32) int {
function compareUint64 (line 134) | func compareUint64(v1, v2 uint64) int {
function compareBE128 (line 145) | func compareBE128(v1, v2 *[16]byte) int {
function lessBE128 (line 166) | func lessBE128(v1, v2 *[16]byte) bool {
function compareRowsFuncOf (line 180) | func compareRowsFuncOf(schema *Schema, sortingColumns []SortingColumn) f...
function compareRowsUnordered (line 212) | func compareRowsUnordered(Row, Row) int { return 0 }
function compareRowsFuncOfIndexColumns (line 215) | func compareRowsFuncOfIndexColumns(compareFuncs []func(Row, Row) int) fu...
function compareRowsFuncOfIndexAscending (line 227) | func compareRowsFuncOfIndexAscending(columnIndex int16, typ Type) func(R...
function compareRowsFuncOfIndexDescending (line 232) | func compareRowsFuncOfIndexDescending(columnIndex int16, typ Type) func(...
function compareRowsFuncOfColumnIndexes (line 237) | func compareRowsFuncOfColumnIndexes(leafColumns []leafColumn, sortingCol...
function compareRowsFuncOfColumnValues (line 262) | func compareRowsFuncOfColumnValues(leafColumns []leafColumn, sortingColu...
FILE: compare_test.go
function assertCompare (line 5) | func assertCompare(t *testing.T, a, b Value, cmp func(Value, Value) int,...
function TestCompareNullsFirst (line 11) | func TestCompareNullsFirst(t *testing.T) {
function TestCompareNullsLast (line 19) | func TestCompareNullsLast(t *testing.T) {
function BenchmarkCompareBE128 (line 27) | func BenchmarkCompareBE128(b *testing.B) {
function BenchmarkLessBE128 (line 36) | func BenchmarkLessBE128(b *testing.B) {
FILE: compress.go
function LookupCompressionCodec (line 61) | func LookupCompressionCodec(codec format.CompressionCodec) compress.Codec {
type unsupported (line 70) | type unsupported struct
method String (line 74) | func (u *unsupported) String() string {
method CompressionCodec (line 78) | func (u *unsupported) CompressionCodec() format.CompressionCodec {
method Encode (line 82) | func (u *unsupported) Encode(dst, src []byte) ([]byte, error) {
method Decode (line 86) | func (u *unsupported) Decode(dst, src []byte) ([]byte, error) {
method error (line 90) | func (u *unsupported) error() error {
function isCompressed (line 94) | func isCompressed(c compress.Codec) bool {
FILE: compress/brotli/brotli.go
constant DefaultQuality (line 13) | DefaultQuality = 0
constant DefaultLGWin (line 14) | DefaultLGWin = 0
type Codec (line 17) | type Codec struct
method String (line 29) | func (c *Codec) String() string {
method CompressionCodec (line 33) | func (c *Codec) CompressionCodec() format.CompressionCodec {
method Encode (line 37) | func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
method Decode (line 46) | func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
type reader (line 52) | type reader struct
method Close (line 54) | func (reader) Close() error { return nil }
FILE: compress/compress.go
type Codec (line 19) | type Codec interface
type Reader (line 39) | type Reader interface
type Writer (line 44) | type Writer interface
type Compressor (line 49) | type Compressor struct
method Encode (line 58) | func (c *Compressor) Encode(dst, src []byte, newWriter func(io.Writer)...
type writer (line 53) | type writer struct
type Decompressor (line 87) | type Decompressor struct
method Decode (line 96) | func (d *Decompressor) Decode(dst, src []byte, newReader func(io.Reade...
type reader (line 91) | type reader struct
FILE: compress/compress_test.go
function TestCompressionCodec (line 54) | func TestCompressionCodec(t *testing.T) {
function BenchmarkEncode (line 84) | func BenchmarkEncode(b *testing.B) {
function BenchmarkDecode (line 97) | func BenchmarkDecode(b *testing.B) {
type simpleReader (line 112) | type simpleReader struct
method Close (line 114) | func (s *simpleReader) Close() error { return nil }
method Reset (line 115) | func (s *simpleReader) Reset(r io.Reader) error { s.Reader = r; return...
type simpleWriter (line 117) | type simpleWriter struct
method Close (line 119) | func (s *simpleWriter) Close() error { return nil }
method Reset (line 120) | func (s *simpleWriter) Reset(w io.Writer) { s.Writer = w }
function BenchmarkCompressor (line 122) | func BenchmarkCompressor(b *testing.B) {
function BenchmarkDecompressor (line 134) | func BenchmarkDecompressor(b *testing.B) {
function benchmarkZeroAllocsPerRun (line 146) | func benchmarkZeroAllocsPerRun(b *testing.B, f func()) {
FILE: compress/gzip/gzip.go
constant emptyGzip (line 14) | emptyGzip = "\x1f\x8b\b\x00\x00\x00\x00\x00\x02\xff\x01\x00\x00\xff\xff\...
constant NoCompression (line 18) | NoCompression = gzip.NoCompression
constant BestSpeed (line 19) | BestSpeed = gzip.BestSpeed
constant BestCompression (line 20) | BestCompression = gzip.BestCompression
constant DefaultCompression (line 21) | DefaultCompression = gzip.DefaultCompression
constant HuffmanOnly (line 22) | HuffmanOnly = gzip.HuffmanOnly
type Codec (line 25) | type Codec struct
method String (line 32) | func (c *Codec) String() string {
method CompressionCodec (line 36) | func (c *Codec) CompressionCodec() format.CompressionCodec {
method Encode (line 40) | func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
method Decode (line 46) | func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
type reader (line 56) | type reader struct
method Reset (line 61) | func (r *reader) Reset(rr io.Reader) error {
FILE: compress/lz4/lz4.go
constant Fast (line 12) | Fast = lz4.Fast
constant Level1 (line 13) | Level1 = lz4.Level1
constant Level2 (line 14) | Level2 = lz4.Level2
constant Level3 (line 15) | Level3 = lz4.Level3
constant Level4 (line 16) | Level4 = lz4.Level4
constant Level5 (line 17) | Level5 = lz4.Level5
constant Level6 (line 18) | Level6 = lz4.Level6
constant Level7 (line 19) | Level7 = lz4.Level7
constant Level8 (line 20) | Level8 = lz4.Level8
constant Level9 (line 21) | Level9 = lz4.Level9
constant DefaultLevel (line 25) | DefaultLevel = Fast
type Codec (line 28) | type Codec struct
method String (line 32) | func (c *Codec) String() string {
method CompressionCodec (line 36) | func (c *Codec) CompressionCodec() format.CompressionCodec {
method Encode (line 40) | func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
method Decode (line 56) | func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
function reserveAtLeast (line 78) | func reserveAtLeast(b []byte, n int) []byte {
FILE: compress/snappy/snappy.go
type Codec (line 9) | type Codec struct
method String (line 17) | func (c *Codec) String() string {
method CompressionCodec (line 21) | func (c *Codec) CompressionCodec() format.CompressionCodec {
method Encode (line 25) | func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
method Decode (line 29) | func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
FILE: compress/uncompressed/uncompressed.go
type Codec (line 10) | type Codec struct
method String (line 13) | func (c *Codec) String() string {
method CompressionCodec (line 17) | func (c *Codec) CompressionCodec() format.CompressionCodec {
method Encode (line 21) | func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
method Decode (line 25) | func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
FILE: compress/zstd/zstd.go
constant SpeedFastest (line 16) | SpeedFastest = zstd.SpeedFastest
constant SpeedDefault (line 20) | SpeedDefault = zstd.SpeedDefault
constant SpeedBetterCompression (line 25) | SpeedBetterCompression = zstd.SpeedBetterCompression
constant SpeedBestCompression (line 29) | SpeedBestCompression = zstd.SpeedBestCompression
constant DefaultLevel (line 33) | DefaultLevel = SpeedDefault
type Codec (line 36) | type Codec struct
method String (line 43) | func (c *Codec) String() string {
method CompressionCodec (line 47) | func (c *Codec) CompressionCodec() format.CompressionCodec {
method Encode (line 51) | func (c *Codec) Encode(dst, src []byte) ([]byte, error) {
method Decode (line 69) | func (c *Codec) Decode(dst, src []byte) ([]byte, error) {
method level (line 84) | func (c *Codec) level() Level {
FILE: config.go
type ReadMode (line 14) | type ReadMode
constant ReadModeSync (line 17) | ReadModeSync ReadMode = iota
constant ReadModeAsync (line 18) | ReadModeAsync
constant DefaultColumnIndexSizeLimit (line 22) | DefaultColumnIndexSizeLimit = 16
constant DefaultColumnBufferCapacity (line 23) | DefaultColumnBufferCapacity = 16 * 1024
constant DefaultPageBufferSize (line 24) | DefaultPageBufferSize = 256 * 1024
constant DefaultWriteBufferSize (line 25) | DefaultWriteBufferSize = 32 * 1024
constant DefaultDataPageVersion (line 26) | DefaultDataPageVersion = 2
constant DefaultDataPageStatistics (line 27) | DefaultDataPageStatistics = false
constant DefaultSkipPageIndex (line 28) | DefaultSkipPageIndex = false
constant DefaultSkipBloomFilters (line 29) | DefaultSkipBloomFilters = false
constant DefaultMaxRowsPerRowGroup (line 30) | DefaultMaxRowsPerRowGroup = math.MaxInt64
constant DefaultReadMode (line 31) | DefaultReadMode = ReadModeSync
constant parquetGoModulePath (line 35) | parquetGoModulePath = "github.com/segmentio/parquet-go"
function defaultCreatedBy (line 43) | func defaultCreatedBy() string {
function parseModuleVersion (line 61) | func parseModuleVersion(version string) (semver, datetime, buildsha stri...
function splitModuleVersion (line 69) | func splitModuleVersion(s string) (head, tail string) {
function formatCreatedBy (line 78) | func formatCreatedBy(application, version, build string) string {
type FileConfig (line 92) | type FileConfig struct
method Apply (line 124) | func (c *FileConfig) Apply(options ...FileOption) {
method ConfigureFile (line 131) | func (c *FileConfig) ConfigureFile(config *FileConfig) {
method Validate (line 142) | func (c *FileConfig) Validate() error {
function DefaultFileConfig (line 102) | func DefaultFileConfig() *FileConfig {
function NewFileConfig (line 117) | func NewFileConfig(options ...FileOption) (*FileConfig, error) {
type ReaderConfig (line 154) | type ReaderConfig struct
method Apply (line 176) | func (c *ReaderConfig) Apply(options ...ReaderOption) {
method ConfigureReader (line 183) | func (c *ReaderConfig) ConfigureReader(config *ReaderConfig) {
method Validate (line 190) | func (c *ReaderConfig) Validate() error {
function DefaultReaderConfig (line 160) | func DefaultReaderConfig() *ReaderConfig {
function NewReaderConfig (line 169) | func NewReaderConfig(options ...ReaderOption) (*ReaderConfig, error) {
type WriterConfig (line 202) | type WriterConfig struct
method Apply (line 248) | func (c *WriterConfig) Apply(options ...WriterOption) {
method ConfigureWriter (line 255) | func (c *WriterConfig) ConfigureWriter(config *WriterConfig) {
method Validate (line 284) | func (c *WriterConfig) Validate() error {
function DefaultWriterConfig (line 220) | func DefaultWriterConfig() *WriterConfig {
function NewWriterConfig (line 241) | func NewWriterConfig(options ...WriterOption) (*WriterConfig, error) {
type RowGroupConfig (line 303) | type RowGroupConfig struct
method Validate (line 332) | func (c *RowGroupConfig) Validate() error {
method Apply (line 340) | func (c *RowGroupConfig) Apply(options ...RowGroupOption) {
method ConfigureRowGroup (line 346) | func (c *RowGroupConfig) ConfigureRowGroup(config *RowGroupConfig) {
function DefaultRowGroupConfig (line 311) | func DefaultRowGroupConfig() *RowGroupConfig {
function NewRowGroupConfig (line 325) | func NewRowGroupConfig(options ...RowGroupOption) (*RowGroupConfig, erro...
type SortingConfig (line 365) | type SortingConfig struct
method Validate (line 390) | func (c *SortingConfig) Validate() error {
method Apply (line 397) | func (c *SortingConfig) Apply(options ...SortingOption) {
method ConfigureSorting (line 403) | func (c *SortingConfig) ConfigureSorting(config *SortingConfig) {
function DefaultSortingConfig (line 373) | func DefaultSortingConfig() *SortingConfig {
function NewSortingConfig (line 384) | func NewSortingConfig(options ...SortingOption) (*SortingConfig, error) {
type FileOption (line 409) | type FileOption interface
type ReaderOption (line 415) | type ReaderOption interface
type WriterOption (line 421) | type WriterOption interface
type RowGroupOption (line 427) | type RowGroupOption interface
type SortingOption (line 433) | type SortingOption interface
function SkipPageIndex (line 443) | func SkipPageIndex(skip bool) FileOption {
function SkipBloomFilters (line 453) | func SkipBloomFilters(skip bool) FileOption {
function FileReadMode (line 464) | func FileReadMode(mode ReadMode) FileOption {
function ReadBufferSize (line 475) | func ReadBufferSize(size int) FileOption {
function FileSchema (line 484) | func FileSchema(schema *Schema) FileOption {
function PageBufferSize (line 497) | func PageBufferSize(size int) WriterOption {
function WriteBufferSize (line 507) | func WriteBufferSize(size int) WriterOption {
function MaxRowsPerRowGroup (line 520) | func MaxRowsPerRowGroup(numRows int64) WriterOption {
function CreatedBy (line 537) | func CreatedBy(application, version, build string) WriterOption {
function ColumnPageBuffers (line 548) | func ColumnPageBuffers(buffers BufferPool) WriterOption {
function ColumnIndexSizeLimit (line 556) | func ColumnIndexSizeLimit(sizeLimit int) WriterOption {
function DataPageVersion (line 564) | func DataPageVersion(version int) WriterOption {
function DataPageStatistics (line 574) | func DataPageStatistics(enabled bool) WriterOption {
function KeyValueMetadata (line 591) | func KeyValueMetadata(key, value string) WriterOption {
function BloomFilters (line 608) | func BloomFilters(filters ...BloomFilterColumn) WriterOption {
function Compression (line 615) | func Compression(codec compress.Codec) WriterOption {
function SortingWriterConfig (line 621) | func SortingWriterConfig(options ...SortingOption) WriterOption {
function ColumnBufferCapacity (line 630) | func ColumnBufferCapacity(size int) RowGroupOption {
function SortingRowGroupConfig (line 636) | func SortingRowGroupConfig(options ...SortingOption) RowGroupOption {
function SortingColumns (line 647) | func SortingColumns(columns ...SortingColumn) SortingOption {
function SortingBuffers (line 660) | func SortingBuffers(buffers BufferPool) SortingOption {
function DropDuplicatedRows (line 671) | func DropDuplicatedRows(drop bool) SortingOption {
type fileOption (line 675) | type fileOption
method ConfigureFile (line 677) | func (opt fileOption) ConfigureFile(config *FileConfig) { opt(config) }
type readerOption (line 679) | type readerOption
method ConfigureReader (line 681) | func (opt readerOption) ConfigureReader(config *ReaderConfig) { opt(co...
type writerOption (line 683) | type writerOption
method ConfigureWriter (line 685) | func (opt writerOption) ConfigureWriter(config *WriterConfig) { opt(co...
type rowGroupOption (line 687) | type rowGroupOption
method ConfigureRowGroup (line 689) | func (opt rowGroupOption) ConfigureRowGroup(config *RowGroupConfig) { ...
type sortingOption (line 691) | type sortingOption
method ConfigureSorting (line 693) | func (opt sortingOption) ConfigureSorting(config *SortingConfig) { opt...
function coalesceInt (line 695) | func coalesceInt(i1, i2 int) int {
function coalesceInt64 (line 702) | func coalesceInt64(i1, i2 int64) int64 {
function coalesceString (line 709) | func coalesceString(s1, s2 string) string {
function coalesceBytes (line 716) | func coalesceBytes(b1, b2 []byte) []byte {
function coalesceBufferPool (line 723) | func coalesceBufferPool(p1, p2 BufferPool) BufferPool {
function coalesceSchema (line 730) | func coalesceSchema(s1, s2 *Schema) *Schema {
function coalesceSortingColumns (line 737) | func coalesceSortingColumns(s1, s2 []SortingColumn) []SortingColumn {
function coalesceSortingConfig (line 744) | func coalesceSortingConfig(c1, c2 SortingConfig) SortingConfig {
function coalesceBloomFilters (line 752) | func coalesceBloomFilters(f1, f2 []BloomFilterColumn) []BloomFilterColumn {
function coalesceCompression (line 759) | func coalesceCompression(c1, c2 compress.Codec) compress.Codec {
function validatePositiveInt (line 766) | func validatePositiveInt(optionName string, optionValue int) error {
function validatePositiveInt64 (line 773) | func validatePositiveInt64(optionName string, optionValue int64) error {
function validateOneOfInt (line 780) | func validateOneOfInt(optionName string, optionValue int, supportedValue...
function validateNotNil (line 789) | func validateNotNil(optionName string, optionValue interface{}) error {
function errorInvalidOptionValue (line 796) | func errorInvalidOptionValue(optionName string, optionValue interface{})...
function errorInvalidConfiguration (line 800) | func errorInvalidConfiguration(reasons ...error) error {
type invalidConfiguration (line 819) | type invalidConfiguration struct
method Error (line 823) | func (err *invalidConfiguration) Error() string {
FILE: convert.go
type ConvertError (line 22) | type ConvertError struct
method Error (line 29) | func (e *ConvertError) Error() string {
type Conversion (line 49) | type Conversion interface
type conversion (line 60) | type conversion struct
method getBuffer (line 152) | func (c *conversion) getBuffer() *conversionBuffer {
method putBuffer (line 166) | func (c *conversion) putBuffer(b *conversionBuffer) {
method Convert (line 172) | func (c *conversion) Convert(rows []Row) (int, error) {
method Column (line 220) | func (c *conversion) Column(i int) int {
method Schema (line 224) | func (c *conversion) Schema() *Schema {
type conversionBuffer (line 69) | type conversionBuffer struct
type conversionColumn (line 73) | type conversionColumn struct
type conversionFunc (line 78) | type conversionFunc
function convertToSelf (line 80) | func convertToSelf(column []Value) error { return nil }
function convertToType (line 83) | func convertToType(targetType, sourceType Type) conversionFunc {
function convertToValue (line 99) | func convertToValue(value Value) conversionFunc {
function convertToZero (line 109) | func convertToZero(kind Kind) conversionFunc {
function convertToLevels (line 121) | func convertToLevels(repetitionLevels, definitionLevels []byte) conversi...
function multiConversionFunc (line 134) | func multiConversionFunc(conversions []conversionFunc) conversionFunc {
type identity (line 228) | type identity struct
method Convert (line 230) | func (id identity) Convert(rows []Row) (int, error) { return len(rows)...
method Column (line 231) | func (id identity) Column(i int) int { return i }
method Schema (line 232) | func (id identity) Schema() *Schema { return id.schema }
function Convert (line 243) | func Convert(to, from Node) (conv Conversion, err error) {
function isDirectLevelMapping (line 337) | func isDirectLevelMapping(levels []byte) bool {
function ConvertRowGroup (line 348) | func ConvertRowGroup(rowGroup RowGroup, conv Conversion) RowGroup {
function maskMissingRowGroupColumns (line 412) | func maskMissingRowGroupColumns(r RowGroup, numColumns int, conv Convers...
type missingColumnChunk (line 446) | type missingColumnChunk struct
method Type (line 454) | func (c *missingColumnChunk) Type() Type { return c.typ }
method Column (line 455) | func (c *missingColumnChunk) Column() int { return int(c....
method Pages (line 456) | func (c *missingColumnChunk) Pages() Pages { return onePag...
method ColumnIndex (line 457) | func (c *missingColumnChunk) ColumnIndex() ColumnIndex { return missin...
method OffsetIndex (line 458) | func (c *missingColumnChunk) OffsetIndex() OffsetIndex { return missin...
method BloomFilter (line 459) | func (c *missingColumnChunk) BloomFilter() BloomFilter { return missin...
method NumValues (line 460) | func (c *missingColumnChunk) NumValues() int64 { return 0 }
type missingColumnIndex (line 462) | type missingColumnIndex struct
method NumPages (line 464) | func (i missingColumnIndex) NumPages() int { return 1 }
method NullCount (line 465) | func (i missingColumnIndex) NullCount(int) int64 { return i.numNulls }
method NullPage (line 466) | func (i missingColumnIndex) NullPage(int) bool { return true }
method MinValue (line 467) | func (i missingColumnIndex) MinValue(int) Value { return Value{} }
method MaxValue (line 468) | func (i missingColumnIndex) MaxValue(int) Value { return Value{} }
method IsAscending (line 469) | func (i missingColumnIndex) IsAscending() bool { return true }
method IsDescending (line 470) | func (i missingColumnIndex) IsDescending() bool { return false }
type missingOffsetIndex (line 472) | type missingOffsetIndex struct
method NumPages (line 474) | func (missingOffsetIndex) NumPages() int { return 1 }
method Offset (line 475) | func (missingOffsetIndex) Offset(int) int64 { return 0 }
method CompressedPageSize (line 476) | func (missingOffsetIndex) CompressedPageSize(int) int64 { return 0 }
method FirstRowIndex (line 477) | func (missingOffsetIndex) FirstRowIndex(int) int64 { return 0 }
type missingBloomFilter (line 479) | type missingBloomFilter struct
method ReadAt (line 481) | func (missingBloomFilter) ReadAt([]byte, int64) (int, error) { return ...
method Size (line 482) | func (missingBloomFilter) Size() int64 { return 0 }
method Check (line 483) | func (missingBloomFilter) Check(Value) (bool, error) { return ...
type missingPage (line 485) | type missingPage struct
method Column (line 487) | func (p missingPage) Column() int { return int(p...
method Dictionary (line 488) | func (p missingPage) Dictionary() Dictionary { return nil }
method NumRows (line 489) | func (p missingPage) NumRows() int64 { return p.num...
method NumValues (line 490) | func (p missingPage) NumValues() int64 { return p.num...
method NumNulls (line 491) | func (p missingPage) NumNulls() int64 { return p.num...
method Bounds (line 492) | func (p missingPage) Bounds() (min, max Value, ok bool) { return }
method Slice (line 493) | func (p missingPage) Slice(i, j int64) Page { return p }
method Size (line 494) | func (p missingPage) Size() int64 { return 0 }
method RepetitionLevels (line 495) | func (p missingPage) RepetitionLevels() []byte { return nil }
method DefinitionLevels (line 496) | func (p missingPage) DefinitionLevels() []byte { return nil }
method Data (line 497) | func (p missingPage) Data() encoding.Values { return p.typ...
method Values (line 498) | func (p missingPage) Values() ValueReader { return &miss...
type missingPageValues (line 500) | type missingPageValues struct
method ReadValues (line 505) | func (r *missingPageValues) ReadValues(values []Value) (int, error) {
method Close (line 520) | func (r *missingPageValues) Close() error {
type convertedRowGroup (line 525) | type convertedRowGroup struct
method NumRows (line 532) | func (c *convertedRowGroup) NumRows() int64 { return ...
method ColumnChunks (line 533) | func (c *convertedRowGroup) ColumnChunks() []ColumnChunk { return ...
method Schema (line 534) | func (c *convertedRowGroup) Schema() *Schema { return ...
method SortingColumns (line 535) | func (c *convertedRowGroup) SortingColumns() []SortingColumn { return ...
method Rows (line 536) | func (c *convertedRowGroup) Rows() Rows {
function ConvertRowReader (line 547) | func ConvertRowReader(rows RowReader, conv Conversion) RowReaderWithSche...
type convertedRows (line 551) | type convertedRows struct
method ReadRows (line 557) | func (c *convertedRows) ReadRows(rows []Row) (int, error) {
method Schema (line 569) | func (c *convertedRows) Schema() *Schema {
method SeekToRow (line 573) | func (c *convertedRows) SeekToRow(rowIndex int64) error {
function convertBooleanToInt32 (line 583) | func convertBooleanToInt32(v Value) (Value, error) {
function convertBooleanToInt64 (line 587) | func convertBooleanToInt64(v Value) (Value, error) {
function convertBooleanToInt96 (line 591) | func convertBooleanToInt96(v Value) (Value, error) {
function convertBooleanToFloat (line 595) | func convertBooleanToFloat(v Value) (Value, error) {
function convertBooleanToDouble (line 599) | func convertBooleanToDouble(v Value) (Value, error) {
function convertBooleanToByteArray (line 603) | func convertBooleanToByteArray(v Value) (Value, error) {
function convertBooleanToFixedLenByteArray (line 607) | func convertBooleanToFixedLenByteArray(v Value, size int) (Value, error) {
function convertBooleanToString (line 614) | func convertBooleanToString(v Value) (Value, error) {
function convertInt32ToBoolean (line 624) | func convertInt32ToBoolean(v Value) (Value, error) {
function convertInt32ToInt64 (line 628) | func convertInt32ToInt64(v Value) (Value, error) {
function convertInt32ToInt96 (line 632) | func convertInt32ToInt96(v Value) (Value, error) {
function convertInt32ToFloat (line 636) | func convertInt32ToFloat(v Value) (Value, error) {
function convertInt32ToDouble (line 640) | func convertInt32ToDouble(v Value) (Value, error) {
function convertInt32ToByteArray (line 644) | func convertInt32ToByteArray(v Value) (Value, error) {
function convertInt32ToFixedLenByteArray (line 650) | func convertInt32ToFixedLenByteArray(v Value, size int) (Value, error) {
function convertInt32ToString (line 658) | func convertInt32ToString(v Value) (Value, error) {
function convertInt64ToBoolean (line 662) | func convertInt64ToBoolean(v Value) (Value, error) {
function convertInt64ToInt32 (line 666) | func convertInt64ToInt32(v Value) (Value, error) {
function convertInt64ToInt96 (line 670) | func convertInt64ToInt96(v Value) (Value, error) {
function convertInt64ToFloat (line 674) | func convertInt64ToFloat(v Value) (Value, error) {
function convertInt64ToDouble (line 678) | func convertInt64ToDouble(v Value) (Value, error) {
function convertInt64ToByteArray (line 682) | func convertInt64ToByteArray(v Value) (Value, error) {
function convertInt64ToFixedLenByteArray (line 688) | func convertInt64ToFixedLenByteArray(v Value, size int) (Value, error) {
function convertInt64ToString (line 696) | func convertInt64ToString(v Value) (Value, error) {
function convertInt96ToBoolean (line 700) | func convertInt96ToBoolean(v Value) (Value, error) {
function convertInt96ToInt32 (line 704) | func convertInt96ToInt32(v Value) (Value, error) {
function convertInt96ToInt64 (line 708) | func convertInt96ToInt64(v Value) (Value, error) {
function convertInt96ToFloat (line 712) | func convertInt96ToFloat(v Value) (Value, error) {
function convertInt96ToDouble (line 716) | func convertInt96ToDouble(v Value) (Value, error) {
function convertInt96ToByteArray (line 720) | func convertInt96ToByteArray(v Value) (Value, error) {
function convertInt96ToFixedLenByteArray (line 724) | func convertInt96ToFixedLenByteArray(v Value, size int) (Value, error) {
function convertInt96ToString (line 736) | func convertInt96ToString(v Value) (Value, error) {
function convertFloatToBoolean (line 740) | func convertFloatToBoolean(v Value) (Value, error) {
function convertFloatToInt32 (line 744) | func convertFloatToInt32(v Value) (Value, error) {
function convertFloatToInt64 (line 748) | func convertFloatToInt64(v Value) (Value, error) {
function convertFloatToInt96 (line 752) | func convertFloatToInt96(v Value) (Value, error) {
function convertFloatToDouble (line 756) | func convertFloatToDouble(v Value) (Value, error) {
function convertFloatToByteArray (line 760) | func convertFloatToByteArray(v Value) (Value, error) {
function convertFloatToFixedLenByteArray (line 766) | func convertFloatToFixedLenByteArray(v Value, size int) (Value, error) {
function convertFloatToString (line 774) | func convertFloatToString(v Value) (Value, error) {
function convertDoubleToBoolean (line 778) | func convertDoubleToBoolean(v Value) (Value, error) {
function convertDoubleToInt32 (line 782) | func convertDoubleToInt32(v Value) (Value, error) {
function convertDoubleToInt64 (line 786) | func convertDoubleToInt64(v Value) (Value, error) {
function convertDoubleToInt96 (line 790) | func convertDoubleToInt96(v Value) (Value, error) {
function convertDoubleToFloat (line 794) | func convertDoubleToFloat(v Value) (Value, error) {
function convertDoubleToByteArray (line 798) | func convertDoubleToByteArray(v Value) (Value, error) {
function convertDoubleToFixedLenByteArray (line 804) | func convertDoubleToFixedLenByteArray(v Value, size int) (Value, error) {
function convertDoubleToString (line 812) | func convertDoubleToString(v Value) (Value, error) {
function convertByteArrayToBoolean (line 816) | func convertByteArrayToBoolean(v Value) (Value, error) {
function convertByteArrayToInt32 (line 820) | func convertByteArrayToInt32(v Value) (Value, error) {
function convertByteArrayToInt64 (line 826) | func convertByteArrayToInt64(v Value) (Value, error) {
function convertByteArrayToInt96 (line 832) | func convertByteArrayToInt96(v Value) (Value, error) {
function convertByteArrayToFloat (line 842) | func convertByteArrayToFloat(v Value) (Value, error) {
function convertByteArrayToDouble (line 848) | func convertByteArrayToDouble(v Value) (Value, error) {
function convertByteArrayToFixedLenByteArray (line 854) | func convertByteArrayToFixedLenByteArray(v Value, size int) (Value, erro...
function convertFixedLenByteArrayToString (line 866) | func convertFixedLenByteArrayToString(v Value) (Value, error) {
function convertStringToBoolean (line 873) | func convertStringToBoolean(v Value) (Value, error) {
function convertStringToInt32 (line 881) | func convertStringToInt32(v Value) (Value, error) {
function convertStringToInt64 (line 889) | func convertStringToInt64(v Value) (Value, error) {
function convertStringToInt96 (line 897) | func convertStringToInt96(v Value) (Value, error) {
function convertStringToFloat (line 909) | func convertStringToFloat(v Value) (Value, error) {
function convertStringToDouble (line 917) | func convertStringToDouble(v Value) (Value, error) {
function convertStringToFixedLenByteArray (line 925) | func convertStringToFixedLenByteArray(v Value, size int) (Value, error) {
function convertStringToDate (line 935) | func convertStringToDate(v Value, tz *time.Location) (Value, error) {
function convertStringToTimeMillis (line 944) | func convertStringToTimeMillis(v Value, tz *time.Location) (Value, error) {
function convertStringToTimeMicros (line 954) | func convertStringToTimeMicros(v Value, tz *time.Location) (Value, error) {
function convertDateToTimestamp (line 964) | func convertDateToTimestamp(v Value, u format.TimeUnit, tz *time.Locatio...
function convertDateToString (line 970) | func convertDateToString(v Value) (Value, error) {
function convertTimeMillisToString (line 976) | func convertTimeMillisToString(v Value, tz *time.Location) (Value, error) {
function convertTimeMicrosToString (line 982) | func convertTimeMicrosToString(v Value, tz *time.Location) (Value, error) {
function convertTimestampToDate (line 988) | func convertTimestampToDate(v Value, u format.TimeUnit, tz *time.Locatio...
function convertTimestampToTimeMillis (line 994) | func convertTimestampToTimeMillis(v Value, u format.TimeUnit, sourceZone...
function convertTimestampToTimeMicros (line 1001) | func convertTimestampToTimeMicros(v Value, u format.TimeUnit, sourceZone...
function convertTimestampToTimestamp (line 1008) | func convertTimestampToTimestamp(v Value, sourceUnit, targetUnit format....
constant nanosecondsPerDay (line 1015) | nanosecondsPerDay = 24 * 60 * 60 * 1e9
function daysSinceUnixEpoch (line 1017) | func daysSinceUnixEpoch(t time.Time) int {
function nearestMidnightLessThan (line 1021) | func nearestMidnightLessThan(t time.Time) time.Time {
function timestamp (line 1026) | func timestamp(v Value, u format.TimeUnit, tz *time.Location) time.Time {
function timeUnitDuration (line 1030) | func timeUnitDuration(unit format.TimeUnit) time.Duration {
function invalidConversion (line 1041) | func invalidConversion(value Value, from, to string) error {
function conversionError (line 1045) | func conversionError(value Value, from, to string, err error) error {
FILE: convert_test.go
type AddressBook1 (line 12) | type AddressBook1 struct
type AddressBook2 (line 17) | type AddressBook2 struct
type AddressBook3 (line 24) | type AddressBook3 struct
type Contact2 (line 29) | type Contact2 struct
type AddressBook4 (line 35) | type AddressBook4 struct
type SimpleNumber (line 41) | type SimpleNumber struct
type SimpleContact (line 45) | type SimpleContact struct
type SimpleAddressBook (line 49) | type SimpleAddressBook struct
type SimpleAddressBook2 (line 54) | type SimpleAddressBook2 struct
type ListOfIDs (line 60) | type ListOfIDs struct
function TestConvert (line 447) | func TestConvert(t *testing.T) {
function newInt64 (line 482) | func newInt64(i int64) *int64 { return &i }
function newString (line 483) | func newString(s string) *string { return &s }
function TestConvertValue (line 485) | func TestConvertValue(t *testing.T) {
FILE: dedupe.go
function DedupeRowReader (line 8) | func DedupeRowReader(reader RowReader, compare func(Row, Row) int) RowRe...
type dedupeRowReader (line 12) | type dedupeRowReader struct
method ReadRows (line 18) | func (d *dedupeRowReader) ReadRows(rows []Row) (int, error) {
function DedupeRowWriter (line 34) | func DedupeRowWriter(writer RowWriter, compare func(Row, Row) int) RowWr...
type dedupeRowWriter (line 38) | type dedupeRowWriter struct
method WriteRows (line 45) | func (d *dedupeRowWriter) WriteRows(rows []Row) (int, error) {
type dedupe (line 68) | type dedupe struct
method reset (line 75) | func (d *dedupe) reset() {
method deduplicate (line 80) | func (d *dedupe) deduplicate(rows []Row, compare func(Row, Row) int) i...
FILE: dedupe_test.go
function TestDedupeRowReader (line 12) | func TestDedupeRowReader(t *testing.T) {
function TestDedupeRowWriter (line 60) | func TestDedupeRowWriter(t *testing.T) {
FILE: deprecated/int96.go
type Int96 (line 10) | type Int96
method IsZero (line 33) | func (i Int96) IsZero() bool { return i == Int96{} }
method Negative (line 36) | func (i Int96) Negative() bool {
method Less (line 43) | func (i Int96) Less(j Int96) bool {
method Int (line 66) | func (i Int96) Int() *big.Int {
method Int32 (line 75) | func (i Int96) Int32() int32 {
method Int64 (line 80) | func (i Int96) Int64() int64 {
method String (line 85) | func (i Int96) String() string {
method Len (line 90) | func (i Int96) Len() int {
function Int32ToInt96 (line 13) | func Int32ToInt96(value int32) (i96 Int96) {
function Int64ToInt96 (line 23) | func Int64ToInt96(value int64) (i96 Int96) {
function Int96ToBytes (line 103) | func Int96ToBytes(data []Int96) []byte {
function BytesToInt96 (line 112) | func BytesToInt96(data []byte) []Int96 {
function MaxLenInt96 (line 116) | func MaxLenInt96(data []Int96) int {
function MinInt96 (line 127) | func MinInt96(data []Int96) (min Int96) {
function MaxInt96 (line 139) | func MaxInt96(data []Int96) (max Int96) {
function MinMaxInt96 (line 151) | func MinMaxInt96(data []Int96) (min, max Int96) {
function OrderOfInt96 (line 167) | func OrderOfInt96(data []Int96) int {
function int96AreInAscendingOrder (line 179) | func int96AreInAscendingOrder(data []Int96) bool {
function int96AreInDescendingOrder (line 188) | func int96AreInDescendingOrder(data []Int96) bool {
FILE: deprecated/int96_test.go
function TestInt96Less (line 10) | func TestInt96Less(t *testing.T) {
function TestMaxLenInt96 (line 91) | func TestMaxLenInt96(t *testing.T) {
FILE: deprecated/parquet.go
type ConvertedType (line 7) | type ConvertedType
constant UTF8 (line 11) | UTF8 ConvertedType = 0
constant Map (line 14) | Map ConvertedType = 1
constant MapKeyValue (line 17) | MapKeyValue ConvertedType = 2
constant List (line 21) | List ConvertedType = 3
constant Enum (line 24) | Enum ConvertedType = 4
constant Decimal (line 38) | Decimal ConvertedType = 5
constant Date (line 43) | Date ConvertedType = 6
constant TimeMillis (line 49) | TimeMillis ConvertedType = 7
constant TimeMicros (line 55) | TimeMicros ConvertedType = 8
constant TimestampMillis (line 61) | TimestampMillis ConvertedType = 9
constant TimestampMicros (line 67) | TimestampMicros ConvertedType = 10
constant Uint8 (line 75) | Uint8 ConvertedType = 11
constant Uint16 (line 76) | Uint16 ConvertedType = 12
constant Uint32 (line 77) | Uint32 ConvertedType = 13
constant Uint64 (line 78) | Uint64 ConvertedType = 14
constant Int8 (line 86) | Int8 ConvertedType = 15
constant Int16 (line 87) | Int16 ConvertedType = 16
constant Int32 (line 88) | Int32 ConvertedType = 17
constant Int64 (line 89) | Int64 ConvertedType = 18
constant Json (line 94) | Json ConvertedType = 19
constant Bson (line 99) | Bson ConvertedType = 20
constant Interval (line 111) | Interval ConvertedType = 21
FILE: dictionary.go
constant hashprobeTableMaxLoad (line 26) | hashprobeTableMaxLoad = 0.85
constant insertsTargetCacheFootprint (line 33) | insertsTargetCacheFootprint = 8192
type Dictionary (line 47) | type Dictionary interface
function checkLookupIndexBounds (line 89) | func checkLookupIndexBounds(indexes []int32, rows sparse.Array) {
type booleanDictionary (line 96) | type booleanDictionary struct
method Type (line 134) | func (d *booleanDictionary) Type() Type { return newIndexedType(d.typ,...
method Len (line 136) | func (d *booleanDictionary) Len() int { return int(d.numValues) }
method Index (line 138) | func (d *booleanDictionary) Index(i int32) Value { return d.makeValue(...
method index (line 140) | func (d *booleanDictionary) index(i int32) bool { return d.valueAt(int...
method Insert (line 142) | func (d *booleanDictionary) Insert(indexes []int32, values []Value) {
method insert (line 147) | func (d *booleanDictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 171) | func (d *booleanDictionary) Lookup(indexes []int32, values []Value) {
method lookup (line 177) | func (d *booleanDictionary) lookup(indexes []int32, rows sparse.Array) {
method Bounds (line 184) | func (d *booleanDictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 206) | func (d *booleanDictionary) Reset() {
method Page (line 213) | func (d *booleanDictionary) Page() Page {
function newBooleanDictionary (line 107) | func newBooleanDictionary(typ Type, columnIndex int16, numValues int32, ...
type int32Dictionary (line 217) | type int32Dictionary struct
method Type (line 232) | func (d *int32Dictionary) Type() Type { return newIndexedType(d.typ, d) }
method Len (line 234) | func (d *int32Dictionary) Len() int { return len(d.values) }
method Index (line 236) | func (d *int32Dictionary) Index(i int32) Value { return d.makeValue(d....
method index (line 238) | func (d *int32Dictionary) index(i int32) int32 { return d.values[i] }
method Insert (line 240) | func (d *int32Dictionary) Insert(indexes []int32, values []Value) {
method init (line 245) | func (d *int32Dictionary) init(indexes []int32) {
method insert (line 256) | func (d *int32Dictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 291) | func (d *int32Dictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 297) | func (d *int32Dictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 306) | func (d *int32Dictionary) Reset() {
method Page (line 313) | func (d *int32Dictionary) Page() Page {
function newInt32Dictionary (line 222) | func newInt32Dictionary(typ Type, columnIndex int16, numValues int32, da...
type int64Dictionary (line 317) | type int64Dictionary struct
method Type (line 332) | func (d *int64Dictionary) Type() Type { return newIndexedType(d.typ, d) }
method Len (line 334) | func (d *int64Dictionary) Len() int { return len(d.values) }
method Index (line 336) | func (d *int64Dictionary) Index(i int32) Value { return d.makeValue(d....
method index (line 338) | func (d *int64Dictionary) index(i int32) int64 { return d.values[i] }
method Insert (line 340) | func (d *int64Dictionary) Insert(indexes []int32, values []Value) {
method init (line 345) | func (d *int64Dictionary) init(indexes []int32) {
method insert (line 356) | func (d *int64Dictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 378) | func (d *int64Dictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 384) | func (d *int64Dictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 393) | func (d *int64Dictionary) Reset() {
method Page (line 400) | func (d *int64Dictionary) Page() Page {
function newInt64Dictionary (line 322) | func newInt64Dictionary(typ Type, columnIndex int16, numValues int32, da...
type int96Dictionary (line 404) | type int96Dictionary struct
method Type (line 419) | func (d *int96Dictionary) Type() Type { return newIndexedType(d.typ, d) }
method Len (line 421) | func (d *int96Dictionary) Len() int { return len(d.values) }
method Index (line 423) | func (d *int96Dictionary) Index(i int32) Value { return d.makeValue(d....
method index (line 425) | func (d *int96Dictionary) index(i int32) deprecated.Int96 { return d.v...
method Insert (line 427) | func (d *int96Dictionary) Insert(indexes []int32, values []Value) {
method insert (line 433) | func (d *int96Dictionary) insert(indexes []int32, rows sparse.Array) {
method insertValues (line 439) | func (d *int96Dictionary) insertValues(indexes []int32, count int, val...
method Lookup (line 463) | func (d *int96Dictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 469) | func (d *int96Dictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 490) | func (d *int96Dictionary) Reset() {
method Page (line 495) | func (d *int96Dictionary) Page() Page {
function newInt96Dictionary (line 409) | func newInt96Dictionary(typ Type, columnIndex int16, numValues int32, da...
type floatDictionary (line 499) | type floatDictionary struct
method Type (line 514) | func (d *floatDictionary) Type() Type { return newIndexedType(d.typ, d) }
method Len (line 516) | func (d *floatDictionary) Len() int { return len(d.values) }
method Index (line 518) | func (d *floatDictionary) Index(i int32) Value { return d.makeValue(d....
method index (line 520) | func (d *floatDictionary) index(i int32) float32 { return d.values[i] }
method Insert (line 522) | func (d *floatDictionary) Insert(indexes []int32, values []Value) {
method init (line 527) | func (d *floatDictionary) init(indexes []int32) {
method insert (line 538) | func (d *floatDictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 560) | func (d *floatDictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 566) | func (d *floatDictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 575) | func (d *floatDictionary) Reset() {
method Page (line 582) | func (d *floatDictionary) Page() Page {
function newFloatDictionary (line 504) | func newFloatDictionary(typ Type, columnIndex int16, numValues int32, da...
type doubleDictionary (line 586) | type doubleDictionary struct
method Type (line 601) | func (d *doubleDictionary) Type() Type { return newIndexedType(d.typ, ...
method Len (line 603) | func (d *doubleDictionary) Len() int { return len(d.values) }
method Index (line 605) | func (d *doubleDictionary) Index(i int32) Value { return d.makeValue(d...
method index (line 607) | func (d *doubleDictionary) index(i int32) float64 { return d.values[i] }
method Insert (line 609) | func (d *doubleDictionary) Insert(indexes []int32, values []Value) {
method init (line 614) | func (d *doubleDictionary) init(indexes []int32) {
method insert (line 625) | func (d *doubleDictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 647) | func (d *doubleDictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 653) | func (d *doubleDictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 662) | func (d *doubleDictionary) Reset() {
method Page (line 669) | func (d *doubleDictionary) Page() Page {
function newDoubleDictionary (line 591) | func newDoubleDictionary(typ Type, columnIndex int16, numValues int32, d...
type byteArrayDictionary (line 673) | type byteArrayDictionary struct
method Type (line 702) | func (d *byteArrayDictionary) Type() Type { return newIndexedType(d.ty...
method Len (line 704) | func (d *byteArrayDictionary) Len() int { return d.len() }
method Index (line 706) | func (d *byteArrayDictionary) Index(i int32) Value { return d.makeValu...
method Insert (line 708) | func (d *byteArrayDictionary) Insert(indexes []int32, values []Value) {
method init (line 713) | func (d *byteArrayDictionary) init() {
method insert (line 722) | func (d *byteArrayDictionary) insert(indexes []int32, rows sparse.Arra...
method Lookup (line 745) | func (d *byteArrayDictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 751) | func (d *byteArrayDictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 782) | func (d *byteArrayDictionary) Reset() {
method Page (line 791) | func (d *byteArrayDictionary) Page() Page {
function newByteArrayDictionary (line 679) | func newByteArrayDictionary(typ Type, columnIndex int16, numValues int32...
type fixedLenByteArrayDictionary (line 795) | type fixedLenByteArrayDictionary struct
method Type (line 812) | func (d *fixedLenByteArrayDictionary) Type() Type { return newIndexedT...
method Len (line 814) | func (d *fixedLenByteArrayDictionary) Len() int { return len(d.data) /...
method Index (line 816) | func (d *fixedLenByteArrayDictionary) Index(i int32) Value {
method index (line 820) | func (d *fixedLenByteArrayDictionary) index(i int32) []byte {
method Insert (line 826) | func (d *fixedLenByteArrayDictionary) Insert(indexes []int32, values [...
method insert (line 832) | func (d *fixedLenByteArrayDictionary) insert(indexes []int32, rows spa...
method insertValues (line 838) | func (d *fixedLenByteArrayDictionary) insertValues(indexes []int32, co...
method Lookup (line 864) | func (d *fixedLenByteArrayDictionary) Lookup(indexes []int32, values [...
method Bounds (line 870) | func (d *fixedLenByteArrayDictionary) Bounds(indexes []int32) (min, ma...
method Reset (line 901) | func (d *fixedLenByteArrayDictionary) Reset() {
method Page (line 906) | func (d *fixedLenByteArrayDictionary) Page() Page {
function newFixedLenByteArrayDictionary (line 800) | func newFixedLenByteArrayDictionary(typ Type, columnIndex int16, numValu...
type uint32Dictionary (line 910) | type uint32Dictionary struct
method Type (line 925) | func (d *uint32Dictionary) Type() Type { return newIndexedType(d.typ, ...
method Len (line 927) | func (d *uint32Dictionary) Len() int { return len(d.values) }
method Index (line 929) | func (d *uint32Dictionary) Index(i int32) Value { return d.makeValue(d...
method index (line 931) | func (d *uint32Dictionary) index(i int32) uint32 { return d.values[i] }
method Insert (line 933) | func (d *uint32Dictionary) Insert(indexes []int32, values []Value) {
method init (line 938) | func (d *uint32Dictionary) init(indexes []int32) {
method insert (line 949) | func (d *uint32Dictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 971) | func (d *uint32Dictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 977) | func (d *uint32Dictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 986) | func (d *uint32Dictionary) Reset() {
method Page (line 993) | func (d *uint32Dictionary) Page() Page {
function newUint32Dictionary (line 915) | func newUint32Dictionary(typ Type, columnIndex int16, numValues int32, d...
type uint64Dictionary (line 997) | type uint64Dictionary struct
method Type (line 1012) | func (d *uint64Dictionary) Type() Type { return newIndexedType(d.typ, ...
method Len (line 1014) | func (d *uint64Dictionary) Len() int { return len(d.values) }
method Index (line 1016) | func (d *uint64Dictionary) Index(i int32) Value { return d.makeValue(d...
method index (line 1018) | func (d *uint64Dictionary) index(i int32) uint64 { return d.values[i] }
method Insert (line 1020) | func (d *uint64Dictionary) Insert(indexes []int32, values []Value) {
method init (line 1025) | func (d *uint64Dictionary) init(indexes []int32) {
method insert (line 1036) | func (d *uint64Dictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 1058) | func (d *uint64Dictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 1064) | func (d *uint64Dictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 1073) | func (d *uint64Dictionary) Reset() {
method Page (line 1080) | func (d *uint64Dictionary) Page() Page {
function newUint64Dictionary (line 1002) | func newUint64Dictionary(typ Type, columnIndex int16, numValues int32, d...
type be128Dictionary (line 1084) | type be128Dictionary struct
method Type (line 1099) | func (d *be128Dictionary) Type() Type { return newIndexedType(d.typ, d) }
method Len (line 1101) | func (d *be128Dictionary) Len() int { return len(d.values) }
method Index (line 1103) | func (d *be128Dictionary) Index(i int32) Value { return d.makeValue(d....
method index (line 1105) | func (d *be128Dictionary) index(i int32) *[16]byte { return &d.values[...
method Insert (line 1107) | func (d *be128Dictionary) Insert(indexes []int32, values []Value) {
method init (line 1143) | func (d *be128Dictionary) init(indexes []int32) {
method insert (line 1154) | func (d *be128Dictionary) insert(indexes []int32, rows sparse.Array) {
method Lookup (line 1176) | func (d *be128Dictionary) Lookup(indexes []int32, values []Value) {
method Bounds (line 1182) | func (d *be128Dictionary) Bounds(indexes []int32) (min, max Value) {
method Reset (line 1191) | func (d *be128Dictionary) Reset() {
method Page (line 1198) | func (d *be128Dictionary) Page() Page {
function newBE128Dictionary (line 1089) | func newBE128Dictionary(typ Type, columnIndex int16, numValues int32, da...
type indexedType (line 1205) | type indexedType struct
method NewColumnBuffer (line 1214) | func (t *indexedType) NewColumnBuffer(columnIndex, numValues int) Colu...
method NewPage (line 1218) | func (t *indexedType) NewPage(columnIndex, numValues int, data encodin...
function newIndexedType (line 1210) | func newIndexedType(typ Type, dict Dictionary) *indexedType {
type indexedPage (line 1225) | type indexedPage struct
method Type (line 1259) | func (page *indexedPage) Type() Type { return indexedPageType{page.typ} }
method Column (line 1261) | func (page *indexedPage) Column() int { return int(^page.columnIndex) }
method Dictionary (line 1263) | func (page *indexedPage) Dictionary() Dictionary { return page.typ.dict }
method NumRows (line 1265) | func (page *indexedPage) NumRows() int64 { return int64(len(page.value...
method NumValues (line 1267) | func (page *indexedPage) NumValues() int64 { return int64(len(page.val...
method NumNulls (line 1269) | func (page *indexedPage) NumNulls() int64 { return 0 }
method Size (line 1271) | func (page *indexedPage) Size() int64 { return 4 * int64(len(page.valu...
method RepetitionLevels (line 1273) | func (page *indexedPage) RepetitionLevels() []byte { return nil }
method DefinitionLevels (line 1275) | func (page *indexedPage) DefinitionLevels() []byte { return nil }
method Data (line 1277) | func (page *indexedPage) Data() encoding.Values { return encoding.Int3...
method Values (line 1279) | func (page *indexedPage) Values() ValueReader { return &indexedPageVal...
method Bounds (line 1281) | func (page *indexedPage) Bounds() (min, max Value, ok bool) {
method Slice (line 1290) | func (page *indexedPage) Slice(i, j int64) Page {
function newIndexedPage (line 1231) | func newIndexedPage(typ *indexedType, columnIndex int16, numValues int32...
type indexedPageType (line 1302) | type indexedPageType struct
method NewValues (line 1304) | func (t indexedPageType) NewValues(values []byte, _ []uint32) encoding...
method Encode (line 1308) | func (t indexedPageType) Encode(dst []byte, src encoding.Values, enc e...
method Decode (line 1312) | func (t indexedPageType) Decode(dst encoding.Values, src []byte, enc e...
method EstimateDecodeSize (line 1316) | func (t indexedPageType) EstimateDecodeSize(numValues int, src []byte,...
type indexedPageValues (line 1320) | type indexedPageValues struct
method ReadValues (line 1325) | func (r *indexedPageValues) ReadValues(values []Value) (n int, err err...
type indexedColumnBuffer (line 1342) | type indexedColumnBuffer struct
method Clone (line 1354) | func (col *indexedColumnBuffer) Clone() ColumnBuffer {
method Type (line 1364) | func (col *indexedColumnBuffer) Type() Type { return col.typ.Type }
method ColumnIndex (line 1366) | func (col *indexedColumnBuffer) ColumnIndex() ColumnIndex { return ind...
method OffsetIndex (line 1368) | func (col *indexedColumnBuffer) OffsetIndex() OffsetIndex { return ind...
method BloomFilter (line 1370) | func (col *indexedColumnBuffer) BloomFilter() BloomFilter { return nil }
method Dictionary (line 1372) | func (col *indexedColumnBuffer) Dictionary() Dictionary { return col.t...
method Pages (line 1374) | func (col *indexedColumnBuffer) Pages() Pages { return onePage(col.Pag...
method Page (line 1376) | func (col *indexedColumnBuffer) Page() Page { return &col.indexedPage }
method Reset (line 1378) | func (col *indexedColumnBuffer) Reset() { col.values = col.values[:0] }
method Cap (line 1380) | func (col *indexedColumnBuffer) Cap() int { return cap(col.values) }
method Len (line 1382) | func (col *indexedColumnBuffer) Len() int { return len(col.values) }
method Less (line 1384) | func (col *indexedColumnBuffer) Less(i, j int) bool {
method Swap (line 1390) | func (col *indexedColumnBuffer) Swap(i, j int) {
method WriteValues (line 1394) | func (col *indexedColumnBuffer) WriteValues(values []Value) (int, erro...
method writeValues (line 1410) | func (col *indexedColumnBuffer) writeValues(rows sparse.Array, _ colum...
method ReadValuesAt (line 1425) | func (col *indexedColumnBuffer) ReadValuesAt(values []Value, offset in...
method ReadRowAt (line 1446) | func (col *indexedColumnBuffer) ReadRowAt(row Row, index int64) (Row, ...
function newIndexedColumnBuffer (line 1344) | func newIndexedColumnBuffer(typ *indexedType, columnIndex int16, numValu...
type indexedColumnIndex (line 1459) | type indexedColumnIndex struct
method NumPages (line 1461) | func (index indexedColumnIndex) NumPages() int { return 1 }
method NullCount (line 1462) | func (index indexedColumnIndex) NullCount(int) int64 { return 0 }
method NullPage (line 1463) | func (index indexedColumnIndex) NullPage(int) bool { return false }
method MinValue (line 1464) | func (index indexedColumnIndex) MinValue(int) Value {
method MaxValue (line 1468) | func (index indexedColumnIndex) MaxValue(int) Value {
method IsAscending (line 1472) | func (index indexedColumnIndex) IsAscending() bool {
method IsDescending (line 1476) | func (index indexedColumnIndex) IsDescending() bool {
type indexedOffsetIndex (line 1481) | type indexedOffsetIndex struct
method NumPages (line 1483) | func (index indexedOffsetIndex) NumPages() int { return...
method Offset (line 1484) | func (index indexedOffsetIndex) Offset(int) int64 { return...
method CompressedPageSize (line 1485) | func (index indexedOffsetIndex) CompressedPageSize(int) int64 { return...
method FirstRowIndex (line 1486) | func (index indexedOffsetIndex) FirstRowIndex(int) int64 { return...
FILE: dictionary_amd64.go
function dictionaryBoundsInt32 (line 13) | func dictionaryBoundsInt32(dict []int32, indexes []int32) (min, max int3...
function dictionaryBoundsInt64 (line 16) | func dictionaryBoundsInt64(dict []int64, indexes []int32) (min, max int6...
function dictionaryBoundsFloat32 (line 19) | func dictionaryBoundsFloat32(dict []float32, indexes []int32) (min, max ...
function dictionaryBoundsFloat64 (line 22) | func dictionaryBoundsFloat64(dict []float64, indexes []int32) (min, max ...
function dictionaryBoundsUint32 (line 25) | func dictionaryBoundsUint32(dict []uint32, indexes []int32) (min, max ui...
function dictionaryBoundsUint64 (line 28) | func dictionaryBoundsUint64(dict []uint64, indexes []int32) (min, max ui...
function dictionaryBoundsBE128 (line 31) | func dictionaryBoundsBE128(dict [][16]byte, indexes []int32) (min, max *...
function dictionaryLookup32 (line 34) | func dictionaryLookup32(dict []uint32, indexes []int32, rows sparse.Arra...
function dictionaryLookup64 (line 37) | func dictionaryLookup64(dict []uint64, indexes []int32, rows sparse.Arra...
function dictionaryLookupByteArrayString (line 40) | func dictionaryLookupByteArrayString(dict []uint32, page []byte, indexes...
function dictionaryLookupFixedLenByteArrayString (line 43) | func dictionaryLookupFixedLenByteArrayString(dict []byte, len int, index...
function dictionaryLookupFixedLenByteArrayPointer (line 46) | func dictionaryLookupFixedLenByteArrayPointer(dict []byte, len int, inde...
method lookup (line 48) | func (d *int32Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 54) | func (d *int64Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 60) | func (d *floatDictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 66) | func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) {
method lookupString (line 72) | func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse....
method lookupString (line 91) | func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows...
method lookup (line 100) | func (d *uint32Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 105) | func (d *uint64Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookupString (line 110) | func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Arra...
method lookupPointer (line 121) | func (d *be128Dictionary) lookupPointer(indexes []int32, rows sparse.Arr...
method bounds (line 130) | func (d *int32Dictionary) bounds(indexes []int32) (min, max int32) {
method bounds (line 136) | func (d *int64Dictionary) bounds(indexes []int32) (min, max int64) {
method bounds (line 142) | func (d *floatDictionary) bounds(indexes []int32) (min, max float32) {
method bounds (line 148) | func (d *doubleDictionary) bounds(indexes []int32) (min, max float64) {
method bounds (line 154) | func (d *uint32Dictionary) bounds(indexes []int32) (min, max uint32) {
method bounds (line 160) | func (d *uint64Dictionary) bounds(indexes []int32) (min, max uint64) {
method bounds (line 166) | func (d *be128Dictionary) bounds(indexes []int32) (min, max *[16]byte) {
FILE: dictionary_purego.go
method lookup (line 11) | func (d *int32Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 18) | func (d *int64Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 25) | func (d *floatDictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 32) | func (d *doubleDictionary) lookup(indexes []int32, rows sparse.Array) {
method lookupString (line 39) | func (d *byteArrayDictionary) lookupString(indexes []int32, rows sparse....
method lookupString (line 47) | func (d *fixedLenByteArrayDictionary) lookupString(indexes []int32, rows...
method lookup (line 55) | func (d *uint32Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookup (line 62) | func (d *uint64Dictionary) lookup(indexes []int32, rows sparse.Array) {
method lookupString (line 69) | func (d *be128Dictionary) lookupString(indexes []int32, rows sparse.Arra...
method lookupPointer (line 78) | func (d *be128Dictionary) lookupPointer(indexes []int32, rows sparse.Arr...
method bounds (line 85) | func (d *int32Dictionary) bounds(indexes []int32) (min, max int32) {
method bounds (line 102) | func (d *int64Dictionary) bounds(indexes []int32) (min, max int64) {
method bounds (line 119) | func (d *floatDictionary) bounds(indexes []int32) (min, max float32) {
method bounds (line 136) | func (d *doubleDictionary) bounds(indexes []int32) (min, max float64) {
method bounds (line 153) | func (d *uint32Dictionary) bounds(indexes []int32) (min, max uint32) {
method bounds (line 170) | func (d *uint64Dictionary) bounds(indexes []int32) (min, max uint64) {
method bounds (line 187) | func (d *be128Dictionary) bounds(indexes []int32) (min, max *[16]byte) {
FILE: dictionary_test.go
function TestDictionary (line 27) | func TestDictionary(t *testing.T) {
function testDictionary (line 37) | func testDictionary(t *testing.T, typ parquet.Type, numValues int) {
function BenchmarkDictionary (line 141) | func BenchmarkDictionary(b *testing.B) {
function TestIssue312 (line 203) | func TestIssue312(t *testing.T) {
FILE: encoding.go
function isDictionaryEncoding (line 86) | func isDictionaryEncoding(encoding encoding.Encoding) bool {
function isDictionaryFormat (line 90) | func isDictionaryFormat(encoding format.Encoding) bool {
function LookupEncoding (line 98) | func LookupEncoding(enc format.Encoding) encoding.Encoding {
function lookupLevelEncoding (line 107) | func lookupLevelEncoding(enc format.Encoding, max byte) encoding.Encoding {
function canEncode (line 119) | func canEncode(e encoding.Encoding, k Kind) bool {
FILE: encoding/bitpacked/bitpacked.go
type Encoding (line 8) | type Encoding struct
method String (line 13) | func (e *Encoding) String() string {
method Encoding (line 17) | func (e *Encoding) Encoding() format.Encoding {
method EncodeLevels (line 21) | func (e *Encoding) EncodeLevels(dst []byte, src []uint8) ([]byte, erro...
method DecodeLevels (line 26) | func (e *Encoding) DecodeLevels(dst []uint8, src []byte) ([]uint8, err...
method wrap (line 31) | func (e *Encoding) wrap(err error) error {
function encodeLevels (line 38) | func encodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) {
function decodeLevels (line 71) | func decodeLevels(dst, src []byte, bitWidth uint) ([]byte, error) {
function bitFlip (line 110) | func bitFlip(b byte) byte {
FILE: encoding/bitpacked/bitpacked_test.go
function FuzzEncodeLevels (line 13) | func FuzzEncodeLevels(f *testing.F) {
FILE: encoding/bytestreamsplit/bytestreamsplit.go
type Encoding (line 11) | type Encoding struct
method String (line 15) | func (e *Encoding) String() string {
method Encoding (line 19) | func (e *Encoding) Encoding() format.Encoding {
method EncodeFloat (line 23) | func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, err...
method EncodeDouble (line 29) | func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, er...
method DecodeFloat (line 35) | func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, ...
method DecodeDouble (line 44) | func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64,...
function resize (line 53) | func resize(buf []byte, size int) []byte {
FILE: encoding/bytestreamsplit/bytestreamsplit_amd64.go
function encodeFloat (line 26) | func encodeFloat(dst, src []byte)
function encodeDouble (line 29) | func encodeDouble(dst, src []byte)
function decodeFloat (line 32) | func decodeFloat(dst, src []byte)
function decodeDouble (line 35) | func decodeDouble(dst, src []byte)
FILE: encoding/bytestreamsplit/bytestreamsplit_purego.go
function encodeFloat (line 7) | func encodeFloat(dst, src []byte) {
function encodeDouble (line 22) | func encodeDouble(dst, src []byte) {
function decodeFloat (line 45) | func decodeFloat(dst, src []byte) {
function decodeDouble (line 61) | func decodeDouble(dst, src []byte) {
FILE: encoding/bytestreamsplit/bytestreamsplit_test.go
function FuzzEncodeFloat (line 14) | func FuzzEncodeFloat(f *testing.F) {
function FuzzEncodeDouble (line 18) | func FuzzEncodeDouble(f *testing.F) {
function TestEncodeFloat (line 22) | func TestEncodeFloat(t *testing.T) {
function TestEncodeDouble (line 26) | func TestEncodeDouble(t *testing.T) {
FILE: encoding/delta/binary_packed.go
type BinaryPackedEncoding (line 16) | type BinaryPackedEncoding struct
method String (line 20) | func (e *BinaryPackedEncoding) String() string {
method Encoding (line 24) | func (e *BinaryPackedEncoding) Encoding() format.Encoding {
method EncodeInt32 (line 28) | func (e *BinaryPackedEncoding) EncodeInt32(dst []byte, src []int32) ([...
method EncodeInt64 (line 32) | func (e *BinaryPackedEncoding) EncodeInt64(dst []byte, src []int64) ([...
method DecodeInt32 (line 36) | func (e *BinaryPackedEncoding) DecodeInt32(dst []int32, src []byte) ([...
method DecodeInt64 (line 42) | func (e *BinaryPackedEncoding) DecodeInt64(dst []int64, src []byte) ([...
method wrap (line 48) | func (e *BinaryPackedEncoding) wrap(err error) error {
constant blockSize (line 56) | blockSize = 128
constant numMiniBlocks (line 57) | numMiniBlocks = 4
constant miniBlockSize (line 58) | miniBlockSize = blockSize / numMiniBlocks
constant maxSupportedBlockSize (line 63) | maxSupportedBlockSize = 65536
constant maxHeaderLength32 (line 65) | maxHeaderLength32 = 4 * binary.MaxVarintLen64
constant maxMiniBlockLength32 (line 66) | maxMiniBlockLength32 = binary.MaxVarintLen64 + numMiniBlocks + (4 * bloc...
constant maxHeaderLength64 (line 68) | maxHeaderLength64 = 8 * binary.MaxVarintLen64
constant maxMiniBlockLength64 (line 69) | maxMiniBlockLength64 = binary.MaxVarintLen64 + numMiniBlocks + (8 * bloc...
function encodeInt32Default (line 77) | func encodeInt32Default(dst []byte, src []int32) []byte {
function encodeInt64Default (line 123) | func encodeInt64Default(dst []byte, src []int64) []byte {
function encodeBinaryPackedHeader (line 169) | func encodeBinaryPackedHeader(dst []byte, blockSize, numMiniBlocks, tota...
function encodeBlockHeader (line 177) | func encodeBlockHeader(dst []byte, minDelta int64, bitWidths [numMiniBlo...
function blockClearInt32 (line 183) | func blockClearInt32(block *[blockSize]int32, blockLength int) {
function blockDeltaInt32 (line 192) | func blockDeltaInt32(block *[blockSize]int32, lastValue int32) int32 {
function blockMinInt32 (line 199) | func blockMinInt32(block *[blockSize]int32) int32 {
function blockSubInt32 (line 209) | func blockSubInt32(block *[blockSize]int32, value int32) {
function blockBitWidthsInt32 (line 215) | func blockBitWidthsInt32(bitWidths *[numMiniBlocks]byte, block *[blockSi...
function blockClearInt64 (line 231) | func blockClearInt64(block *[blockSize]int64, blockLength int) {
function blockDeltaInt64 (line 240) | func blockDeltaInt64(block *[blockSize]int64, lastValue int64) int64 {
function blockMinInt64 (line 247) | func blockMinInt64(block *[blockSize]int64) int64 {
function blockSubInt64 (line 257) | func blockSubInt64(block *[blockSize]int64, value int64) {
function blockBitWidthsInt64 (line 263) | func blockBitWidthsInt64(bitWidths *[numMiniBlocks]byte, block *[blockSi...
function decodeInt32 (line 279) | func decodeInt32(dst, src []byte) ([]byte, []byte, error) {
function decodeInt64 (line 346) | func decodeInt64(dst, src []byte) ([]byte, []byte, error) {
function decodeBinaryPackedHeader (line 409) | func decodeBinaryPackedHeader(src []byte) (blockSize, numMiniBlocks, tot...
function decodeBinaryPackedBlock (line 454) | func decodeBinaryPackedBlock(src []byte, numMiniBlocks int) (minDelta in...
function decodeUvarint (line 468) | func decodeUvarint(buf []byte, what string) (u uint64, n int, err error) {
function decodeVarint (line 479) | func decodeVarint(buf []byte, what string) (v int64, n int, err error) {
FILE: encoding/delta/binary_packed_amd64.go
function init (line 10) | func init() {
function blockDeltaInt32AVX2 (line 18) | func blockDeltaInt32AVX2(block *[blockSize]int32, lastValue int32) int32
function blockMinInt32AVX2 (line 21) | func blockMinInt32AVX2(block *[blockSize]int32) int32
function blockSubInt32AVX2 (line 24) | func blockSubInt32AVX2(block *[blockSize]int32, value int32)
function blockBitWidthsInt32AVX2 (line 27) | func blockBitWidthsInt32AVX2(bitWidths *[numMiniBlocks]byte, block *[blo...
function encodeMiniBlockInt32Default (line 30) | func encodeMiniBlockInt32Default(dst *byte, src *[miniBlockSize]int32, b...
function encodeMiniBlockInt32x1bitAVX2 (line 33) | func encodeMiniBlockInt32x1bitAVX2(dst *byte, src *[miniBlockSize]int32)
function encodeMiniBlockInt32x2bitsAVX2 (line 36) | func encodeMiniBlockInt32x2bitsAVX2(dst *byte, src *[miniBlockSize]int32)
function encodeMiniBlockInt32x3to16bitsAVX2 (line 39) | func encodeMiniBlockInt32x3to16bitsAVX2(dst *byte, src *[miniBlockSize]i...
function encodeMiniBlockInt32x32bitsAVX2 (line 42) | func encodeMiniBlockInt32x32bitsAVX2(dst *byte, src *[miniBlockSize]int32)
function encodeMiniBlockInt32 (line 44) | func encodeMiniBlockInt32(dst []byte, src *[miniBlockSize]int32, bitWidt...
function encodeMiniBlockInt32AVX2 (line 48) | func encodeMiniBlockInt32AVX2(dst *byte, src *[miniBlockSize]int32, bitW...
function encodeInt32AVX2 (line 63) | func encodeInt32AVX2(dst []byte, src []int32) []byte {
function blockDeltaInt64AVX2 (line 110) | func blockDeltaInt64AVX2(block *[blockSize]int64, lastValue int64) int64
function blockMinInt64AVX2 (line 113) | func blockMinInt64AVX2(block *[blockSize]int64) int64
function blockSubInt64AVX2 (line 116) | func blockSubInt64AVX2(block *[blockSize]int64, value int64)
function blockBitWidthsInt64AVX2 (line 119) | func blockBitWidthsInt64AVX2(bitWidths *[numMiniBlocks]byte, block *[blo...
function encodeMiniBlockInt64Default (line 122) | func encodeMiniBlockInt64Default(dst *byte, src *[miniBlockSize]int64, b...
function encodeMiniBlockInt64x1bitAVX2 (line 125) | func encodeMiniBlockInt64x1bitAVX2(dst *byte, src *[miniBlockSize]int64)
function encodeMiniBlockInt64x2bitsAVX2 (line 128) | func encodeMiniBlockInt64x2bitsAVX2(dst *byte, src *[miniBlockSize]int64)
function encodeMiniBlockInt64x64bitsAVX2 (line 131) | func encodeMiniBlockInt64x64bitsAVX2(dst *byte, src *[miniBlockSize]int64)
function encodeMiniBlockInt64 (line 133) | func encodeMiniBlockInt64(dst []byte, src *[miniBlockSize]int64, bitWidt...
function encodeMiniBlockInt64AVX2 (line 137) | func encodeMiniBlockInt64AVX2(dst *byte, src *[miniBlockSize]int64, bitW...
function encodeInt64AVX2 (line 150) | func encodeInt64AVX2(dst []byte, src []int64) []byte {
function decodeBlockInt32Default (line 197) | func decodeBlockInt32Default(dst []int32, minDelta, lastValue int32) int32
function decodeBlockInt32AVX2 (line 200) | func decodeBlockInt32AVX2(dst []int32, minDelta, lastValue int32) int32
function decodeBlockInt32 (line 202) | func decodeBlockInt32(dst []int32, minDelta, lastValue int32) int32 {
function decodeMiniBlockInt32Default (line 212) | func decodeMiniBlockInt32Default(dst []int32, src []uint32, bitWidth uint)
function decodeMiniBlockInt32x1to16bitsAVX2 (line 215) | func decodeMiniBlockInt32x1to16bitsAVX2(dst []int32, src []uint32, bitWi...
function decodeMiniBlockInt32x17to26bitsAVX2 (line 218) | func decodeMiniBlockInt32x17to26bitsAVX2(dst []int32, src []uint32, bitW...
function decodeMiniBlockInt32x27to31bitsAVX2 (line 221) | func decodeMiniBlockInt32x27to31bitsAVX2(dst []int32, src []uint32, bitW...
function decodeMiniBlockInt32 (line 223) | func decodeMiniBlockInt32(dst []int32, src []uint32, bitWidth uint) {
function decodeBlockInt64Default (line 240) | func decodeBlockInt64Default(dst []int64, minDelta, lastValue int64) int64
function decodeBlockInt64 (line 242) | func decodeBlockInt64(dst []int64, minDelta, lastValue int64) int64 {
function decodeMiniBlockInt64Default (line 247) | func decodeMiniBlockInt64Default(dst []int64, src []uint32, bitWidth uint)
function decodeMiniBlockInt64 (line 249) | func decodeMiniBlockInt64(dst []int64, src []uint32, bitWidth uint) {
FILE: encoding/delta/binary_packed_amd64_test.go
function requireAVX2 (line 11) | func requireAVX2(t testing.TB) {
function TestBlockDeltaInt32AVX2 (line 17) | func TestBlockDeltaInt32AVX2(t *testing.T) {
function TestBlockMinInt32AVX2 (line 22) | func TestBlockMinInt32AVX2(t *testing.T) {
function TestBlockSubInt32AVX2 (line 27) | func TestBlockSubInt32AVX2(t *testing.T) {
function TestBlockBitWidthsInt32AVX2 (line 32) | func TestBlockBitWidthsInt32AVX2(t *testing.T) {
function TestEncodeMiniBlockInt32AVX2 (line 37) | func TestEncodeMiniBlockInt32AVX2(t *testing.T) {
function BenchmarkBlockDeltaInt32AVX2 (line 46) | func BenchmarkBlockDeltaInt32AVX2(b *testing.B) {
function BenchmarkBlockMinInt32AVX2 (line 51) | func BenchmarkBlockMinInt32AVX2(b *testing.B) {
function BenchmarkBlockSubInt32AVX2 (line 56) | func BenchmarkBlockSubInt32AVX2(b *testing.B) {
function BenchmarkBlockBitWidthsInt32AVX2 (line 61) | func BenchmarkBlockBitWidthsInt32AVX2(b *testing.B) {
function BenchmarkEncodeMiniBlockInt32AVX2 (line 66) | func BenchmarkEncodeMiniBlockInt32AVX2(b *testing.B) {
function TestBlockDeltaInt64AVX2 (line 75) | func TestBlockDeltaInt64AVX2(t *testing.T) {
function TestBlockMinInt64AVX2 (line 80) | func TestBlockMinInt64AVX2(t *testing.T) {
function TestBlockSubInt64AVX2 (line 85) | func TestBlockSubInt64AVX2(t *testing.T) {
function TestBlockBitWidthsInt64AVX2 (line 90) | func TestBlockBitWidthsInt64AVX2(t *testing.T) {
function TestEncodeMiniBlockInt64AVX2 (line 95) | func TestEncodeMiniBlockInt64AVX2(t *testing.T) {
function BenchmarkBlockDeltaInt64AVX2 (line 104) | func BenchmarkBlockDeltaInt64AVX2(b *testing.B) {
function BenchmarkBlockMinInt64AVX2 (line 109) | func BenchmarkBlockMinInt64AVX2(b *testing.B) {
function BenchmarkBlockSubInt64AVX2 (line 114) | func BenchmarkBlockSubInt64AVX2(b *testing.B) {
function BenchmarkBlockBitWidthsInt64AVX2 (line 119) | func BenchmarkBlockBitWidthsInt64AVX2(b *testing.B) {
function BenchmarkEncodeMiniBlockInt64AVX2 (line 124) | func BenchmarkEncodeMiniBlockInt64AVX2(b *testing.B) {
FILE: encoding/delta/binary_packed_purego.go
function encodeMiniBlockInt32 (line 9) | func encodeMiniBlockInt32(dst []byte, src *[miniBlockSize]int32, bitWidt...
function encodeMiniBlockInt64 (line 30) | func encodeMiniBlockInt64(dst []byte, src *[miniBlockSize]int64, bitWidt...
function decodeBlockInt32 (line 51) | func decodeBlockInt32(block []int32, minDelta, lastValue int32) int32 {
function decodeBlockInt64 (line 60) | func decodeBlockInt64(block []int64, minDelta, lastValue int64) int64 {
function decodeMiniBlockInt32 (line 69) | func decodeMiniBlockInt32(dst []int32, src []uint32, bitWidth uint) {
function decodeMiniBlockInt64 (line 86) | func decodeMiniBlockInt64(dst []int64, src []uint32, bitWidth uint) {
FILE: encoding/delta/binary_packed_test.go
function maxLen32 (line 10) | func maxLen32(miniBlock []int32) (maxLen int) {
function maxLen64 (line 19) | func maxLen64(miniBlock []int64) (maxLen int) {
function TestBlockDeltaInt32 (line 28) | func TestBlockDeltaInt32(t *testing.T) {
function testBlockDeltaInt32 (line 32) | func testBlockDeltaInt32(t *testing.T, f func(*[blockSize]int32, int32) ...
function TestBlockMinInt32 (line 51) | func TestBlockMinInt32(t *testing.T) {
function testBlockMinInt32 (line 55) | func testBlockMinInt32(t *testing.T, f func(*[blockSize]int32) int32) {
function TestBlockSubInt32 (line 66) | func TestBlockSubInt32(t *testing.T) {
function testBlockSubInt32 (line 70) | func testBlockSubInt32(t *testing.T, f func(*[blockSize]int32, int32)) {
function TestBlockBitWidthsInt32 (line 84) | func TestBlockBitWidthsInt32(t *testing.T) {
function testBlockBitWidthsInt32 (line 88) | func testBlockBitWidthsInt32(t *testing.T, f func(*[numMiniBlocks]byte, ...
function TestEncodeMiniBlockInt32 (line 109) | func TestEncodeMiniBlockInt32(t *testing.T) {
function testEncodeMiniBlockInt32 (line 113) | func testEncodeMiniBlockInt32(t *testing.T, f func([]byte, *[miniBlockSi...
function BenchmarkBlockDeltaInt32 (line 145) | func BenchmarkBlockDeltaInt32(b *testing.B) {
function benchmarkBlockDeltaInt32 (line 149) | func benchmarkBlockDeltaInt32(b *testing.B, f func(*[blockSize]int32, in...
function BenchmarkBlockMinInt32 (line 157) | func BenchmarkBlockMinInt32(b *testing.B) {
function benchmarkBlockMinInt32 (line 161) | func benchmarkBlockMinInt32(b *testing.B, f func(*[blockSize]int32) int3...
function BenchmarkBlockSubInt32 (line 169) | func BenchmarkBlockSubInt32(b *testing.B) {
function benchmarkBlockSubInt32 (line 173) | func benchmarkBlockSubInt32(b *testing.B, f func(*[blockSize]int32, int3...
function BenchmarkBlockBitWidthsInt32 (line 181) | func BenchmarkBlockBitWidthsInt32(b *testing.B) {
function benchmarkBlockBitWidthsInt32 (line 185) | func benchmarkBlockBitWidthsInt32(b *testing.B, f func(*[numMiniBlocks]b...
function BenchmarkEncodeMiniBlockInt32 (line 194) | func BenchmarkEncodeMiniBlockInt32(b *testing.B) {
function benchmarkEncodeMiniBlockInt32 (line 198) | func benchmarkEncodeMiniBlockInt32(b *testing.B, f func([]byte, *[miniBl...
function TestBlockDeltaInt64 (line 211) | func TestBlockDeltaInt64(t *testing.T) {
function testBlockDeltaInt64 (line 215) | func testBlockDeltaInt64(t *testing.T, f func(*[blockSize]int64, int64) ...
function TestBlockMinInt64 (line 234) | func TestBlockMinInt64(t *testing.T) {
function testBlockMinInt64 (line 238) | func testBlockMinInt64(t *testing.T, f func(*[blockSize]int64) int64) {
function TestBlockSubInt64 (line 248) | func TestBlockSubInt64(t *testing.T) {
function testBlockSubInt64 (line 252) | func testBlockSubInt64(t *testing.T, f func(*[blockSize]int64, int64)) {
function TestBlockBitWidthsInt64 (line 265) | func TestBlockBitWidthsInt64(t *testing.T) {
function testBlockBitWidthsInt64 (line 269) | func testBlockBitWidthsInt64(t *testing.T, f func(*[numMiniBlocks]byte, ...
function TestEncodeMiniBlockInt64 (line 289) | func TestEncodeMiniBlockInt64(t *testing.T) {
function testEncodeMiniBlockInt64 (line 293) | func testEncodeMiniBlockInt64(t *testing.T, f func([]byte, *[miniBlockSi...
function BenchmarkBlockDeltaInt64 (line 324) | func BenchmarkBlockDeltaInt64(b *testing.B) {
function benchmarkBlockDeltaInt64 (line 328) | func benchmarkBlockDeltaInt64(b *testing.B, f func(*[blockSize]int64, in...
function BenchmarkBlockMinInt64 (line 336) | func BenchmarkBlockMinInt64(b *testing.B) {
function benchmarkBlockMinInt64 (line 340) | func benchmarkBlockMinInt64(b *testing.B, f func(*[blockSize]int64) int6...
function BenchmarkBlockSubInt64 (line 348) | func BenchmarkBlockSubInt64(b *testing.B) {
function benchmarkBlockSubInt64 (line 352) | func benchmarkBlockSubInt64(b *testing.B, f func(*[blockSize]int64, int6...
function BenchmarkBlockBitWidthsInt64 (line 360) | func BenchmarkBlockBitWidthsInt64(b *testing.B) {
function benchmarkBlockBitWidthsInt64 (line 364) | func benchmarkBlockBitWidthsInt64(b *testing.B, f func(*[numMiniBlocks]b...
function BenchmarkEncodeMiniBlockInt64 (line 373) | func BenchmarkEncodeMiniBlockInt64(b *testing.B) {
function benchmarkEncodeMiniBlockInt64 (line 377) | func benchmarkEncodeMiniBlockInt64(b *testing.B, f func([]byte, *[miniBl...
FILE: encoding/delta/byte_array.go
constant maxLinearSearchPrefixLength (line 12) | maxLinearSearchPrefixLength = 64
type ByteArrayEncoding (line 15) | type ByteArrayEncoding struct
method String (line 19) | func (e *ByteArrayEncoding) String() string {
method Encoding (line 23) | func (e *ByteArrayEncoding) Encoding() format.Encoding {
method EncodeByteArray (line 27) | func (e *ByteArrayEncoding) EncodeByteArray(dst []byte, src []byte, of...
method EncodeFixedLenByteArray (line 80) | func (e *ByteArrayEncoding) EncodeFixedLenByteArray(dst []byte, src []...
method DecodeByteArray (line 128) | func (e *ByteArrayEncoding) DecodeByteArray(dst []byte, src []byte, of...
method DecodeFixedLenByteArray (line 152) | func (e *ByteArrayEncoding) DecodeFixedLenByteArray(dst []byte, src []...
method EstimateDecodeByteArraySize (line 180) | func (e *ByteArrayEncoding) EstimateDecodeByteArraySize(src []byte) int {
method wrap (line 189) | func (e *ByteArrayEncoding) wrap(err error) error {
method wrapf (line 196) | func (e *ByteArrayEncoding) wrapf(msg string, args ...interface{}) err...
function linearSearchPrefixLength (line 200) | func linearSearchPrefixLength(base, data []byte) (n int) {
function binarySearchPrefixLength (line 207) | func binarySearchPrefixLength(base, data []byte) int {
FILE: encoding/delta/byte_array_amd64.go
function validatePrefixAndSuffixLengthValuesAVX2 (line 10) | func validatePrefixAndSuffixLengthValuesAVX2(prefix, suffix []int32, max...
function validatePrefixAndSuffixLengthValues (line 12) | func validatePrefixAndSuffixLengthValues(prefix, suffix []int32, maxLeng...
function decodeByteArrayOffsets (line 51) | func decodeByteArrayOffsets(offsets []uint32, prefix, suffix []int32)
function decodeByteArrayAVX2 (line 54) | func decodeByteArrayAVX2(dst, src []byte, prefix, suffix []int32) int
function decodeByteArray (line 56) | func decodeByteArray(dst, src []byte, prefix, suffix []int32, offsets []...
function decodeByteArrayAVX2x128bits (line 111) | func decodeByteArrayAVX2x128bits(dst, src []byte, prefix, suffix []int32...
function decodeFixedLenByteArray (line 113) | func decodeFixedLenByteArray(dst, src []byte, size int, prefix, suffix [...
FILE: encoding/delta/byte_array_purego.go
function decodeByteArray (line 5) | func decodeByteArray(dst, src []byte, prefix, suffix []int32, offsets []...
function decodeFixedLenByteArray (line 36) | func decodeFixedLenByteArray(dst, src []byte, size int, prefix, suffix [...
FILE: encoding/delta/byte_array_test.go
function TestLinearSearchPrefixLength (line 9) | func TestLinearSearchPrefixLength(t *testing.T) {
function TestBinarySearchPrefixLength (line 13) | func TestBinarySearchPrefixLength(t *testing.T) {
function testSearchPrefixLength (line 19) | func testSearchPrefixLength(t *testing.T, prefixLength func(base, data [...
function BenchmarkLinearSearchPrefixLength (line 132) | func BenchmarkLinearSearchPrefixLength(b *testing.B) {
function BenchmarkBinarySearchPrefixLength (line 136) | func BenchmarkBinarySearchPrefixLength(b *testing.B) {
function benchmarkSearchPrefixLength (line 142) | func benchmarkSearchPrefixLength(b *testing.B, prefixLength func(base, d...
FILE: encoding/delta/delta.go
type int32Buffer (line 10) | type int32Buffer struct
method resize (line 14) | func (buf *int32Buffer) resize(size int) {
method decode (line 22) | func (buf *int32Buffer) decode(src []byte) ([]byte, error) {
method sum (line 28) | func (buf *int32Buffer) sum() (sum int32) {
function getInt32Buffer (line 39) | func getInt32Buffer() *int32Buffer {
function putInt32Buffer (line 51) | func putInt32Buffer(b *int32Buffer) {
function resizeNoMemclr (line 55) | func resizeNoMemclr(buf []byte, size int) []byte {
function resize (line 62) | func resize(buf []byte, size int) []byte {
function grow (line 75) | func grow(buf []byte, size int) []byte {
function min (line 85) | func min(a, b int) int {
function errPrefixAndSuffixLengthMismatch (line 92) | func errPrefixAndSuffixLengthMismatch(prefixLength, suffixLength int) er...
function errInvalidNegativeValueLength (line 96) | func errInvalidNegativeValueLength(length int) error {
function errInvalidNegativePrefixLength (line 100) | func errInvalidNegativePrefixLength(length int) error {
function errValueLengthOutOfBounds (line 104) | func errValueLengthOutOfBounds(length, maxLength int) error {
function errPrefixLengthOutOfBounds (line 108) | func errPrefixLengthOutOfBounds(length, maxLength int) error {
FILE: encoding/delta/delta_amd64.go
constant padding (line 6) | padding = 64
function findNegativeLength (line 9) | func findNegativeLength(lengths []int32) int {
FILE: encoding/delta/delta_test.go
function FuzzDeltaBinaryPackedInt32 (line 15) | func FuzzDeltaBinaryPackedInt32(f *testing.F) {
function FuzzDeltaBinaryPackedInt64 (line 19) | func FuzzDeltaBinaryPackedInt64(f *testing.F) {
function FuzzDeltaLengthByteArray (line 23) | func FuzzDeltaLengthByteArray(f *testing.F) {
function FuzzDeltaByteArray (line 27) | func FuzzDeltaByteArray(f *testing.F) {
constant encodeMinNumValues (line 32) | encodeMinNumValues = 0
constant encodeMaxNumValues (line 33) | encodeMaxNumValues = 200
function TestEncodeInt32 (line 36) | func TestEncodeInt32(t *testing.T) {
function TestEncodeInt64 (line 49) | func TestEncodeInt64(t *testing.T) {
FILE: encoding/delta/length_byte_array.go
type LengthByteArrayEncoding (line 8) | type LengthByteArrayEncoding struct
method String (line 12) | func (e *LengthByteArrayEncoding) String() string {
method Encoding (line 16) | func (e *LengthByteArrayEncoding) Encoding() format.Encoding {
method EncodeByteArray (line 20) | func (e *LengthByteArrayEncoding) EncodeByteArray(dst []byte, src []by...
method DecodeByteArray (line 37) | func (e *LengthByteArrayEncoding) DecodeByteArray(dst []byte, src []by...
method EstimateDecodeByteArraySize (line 65) | func (e *LengthByteArrayEncoding) EstimateDecodeByteArraySize(src []by...
method CanDecodeInPlace (line 72) | func (e *LengthByteArrayEncoding) CanDecodeInPlace() bool {
method wrap (line 76) | func (e *LengthByteArrayEncoding) wrap(err error) error {
FILE: encoding/delta/length_byte_array_amd64.go
function encodeByteArrayLengths (line 6) | func encodeByteArrayLengths(lengths []int32, offsets []uint32)
function decodeByteArrayLengths (line 9) | func decodeByteArrayLengths(offsets []uint32, lengths []int32) (lastOffs...
FILE: encoding/delta/length_byte_array_purego.go
function encodeByteArrayLengths (line 5) | func encodeByteArrayLengths(lengths []int32, offsets []uint32) {
function decodeByteArrayLengths (line 11) | func decodeByteArrayLengths(offsets []uint32, lengths []int32) (uint32, ...
FILE: encoding/delta/length_byte_array_test.go
function TestDecodeByteArrayLengths (line 5) | func TestDecodeByteArrayLengths(t *testing.T) {
FILE: encoding/encoding.go
constant MaxFixedLenByteArraySize (line 13) | MaxFixedLenByteArraySize = math.MaxInt16
type Encoding (line 20) | type Encoding interface
FILE: encoding/encoding_test.go
function repeatInt64 (line 22) | func repeatInt64(seq []int64, n int) []int64 {
function TestEncoding (line 206) | func TestEncoding(t *testing.T) {
function testEncoding (line 212) | func testEncoding(t *testing.T, e encoding.Encoding) {
function setBitWidth (line 266) | func setBitWidth(enc encoding.Encoding, bitWidth int) {
type encodingFunc (line 275) | type encodingFunc
function testBooleanEncoding (line 277) | func testBooleanEncoding(t *testing.T, e encoding.Encoding) {
function testLevelsEncoding (line 304) | func testLevelsEncoding(t *testing.T, e encoding.Encoding) {
function testInt32Encoding (line 323) | func testInt32Encoding(t *testing.T, e encoding.Encoding) {
function testInt64Encoding (line 342) | func testInt64Encoding(t *testing.T, e encoding.Encoding) {
function testInt96Encoding (line 361) | func testInt96Encoding(t *testing.T, e encoding.Encoding) {
function testFloatEncoding (line 378) | func testFloatEncoding(t *testing.T, e encoding.Encoding) {
function testDoubleEncoding (line 395) | func testDoubleEncoding(t *testing.T, e encoding.Encoding) {
function testByteArrayEncoding (line 412) | func testByteArrayEncoding(t *testing.T, e encoding.Encoding) {
function testFixedLenByteArrayEncoding (line 446) | func testFixedLenByteArrayEncoding(t *testing.T, e encoding.Encoding) {
function testCanEncodeBoolean (line 463) | func testCanEncodeBoolean(t testing.TB, e encoding.Encoding) {
function testCanEncodeLevels (line 467) | func testCanEncodeLevels(t testing.TB, e encoding.Encoding) {
function testCanEncodeInt32 (line 471) | func testCanEncodeInt32(t testing.TB, e encoding.Encoding) {
function testCanEncodeInt64 (line 475) | func testCanEncodeInt64(t testing.TB, e encoding.Encoding) {
function testCanEncodeInt96 (line 479) | func testCanEncodeInt96(t testing.TB, e encoding.Encoding) {
function testCanEncodeFloat (line 483) | func testCanEncodeFloat(t testing.TB, e encoding.Encoding) {
function testCanEncodeDouble (line 487) | func testCanEncodeDouble(t testing.TB, e encoding.Encoding) {
function testCanEncodeByteArray (line 491) | func testCanEncodeByteArray(t testing.TB, e encoding.Encoding) {
function testCanEncodeFixedLenByteArray (line 495) | func testCanEncodeFixedLenByteArray(t testing.TB, e encoding.Encoding) {
function testCanEncode (line 499) | func testCanEncode(t testing.TB, e encoding.Encoding, test func(encoding...
function assertNoError (line 505) | func assertNoError(t *testing.T, err error) {
function assertEqualBytes (line 512) | func assertEqualBytes(t *testing.T, want, got []byte) {
function assertEqualInt32 (line 519) | func assertEqualInt32(t *testing.T, want, got []int32) {
function assertEqualInt64 (line 524) | func assertEqualInt64(t *testing.T, want, got []int64) {
function assertEqualInt96 (line 529) | func assertEqualInt96(t *testing.T, want, got []deprecated.Int96) {
function assertEqualFloat32 (line 534) | func assertEqualFloat32(t *testing.T, want, got []float32) {
function assertEqualFloat64 (line 539) | func assertEqualFloat64(t *testing.T, want, got []float64) {
constant benchmarkNumValues (line 545) | benchmarkNumValues = 10e3
function newRand (line 548) | func newRand() *rand.Rand {
function BenchmarkEncode (line 552) | func BenchmarkEncode(b *testing.B) {
function benchmarkEncode (line 558) | func benchmarkEncode(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeBoolean (line 600) | func benchmarkEncodeBoolean(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeLevels (line 613) | func benchmarkEncodeLevels(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeInt32 (line 626) | func benchmarkEncodeInt32(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeInt64 (line 639) | func benchmarkEncodeInt64(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeFloat (line 652) | func benchmarkEncodeFloat(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeDouble (line 664) | func benchmarkEncodeDouble(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeByteArray (line 676) | func benchmarkEncodeByteArray(b *testing.B, e encoding.Encoding) {
function benchmarkEncodeFixedLenByteArray (line 689) | func benchmarkEncodeFixedLenByteArray(b *testing.B, e encoding.Encoding) {
function BenchmarkDecode (line 702) | func BenchmarkDecode(b *testing.B) {
function benchmarkDecode (line 708) | func benchmarkDecode(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeBoolean (line 750) | func benchmarkDecodeBoolean(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeLevels (line 763) | func benchmarkDecodeLevels(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeInt32 (line 776) | func benchmarkDecodeInt32(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeInt64 (line 789) | func benchmarkDecodeInt64(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeFloat (line 802) | func benchmarkDecodeFloat(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeDouble (line 814) | func benchmarkDecodeDouble(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeByteArray (line 826) | func benchmarkDecodeByteArray(b *testing.B, e encoding.Encoding) {
function benchmarkDecodeFixedLenByteArray (line 839) | func benchmarkDecodeFixedLenByteArray(b *testing.B, e encoding.Encoding) {
function benchmarkZeroAllocsPerRun (line 852) | func benchmarkZeroAllocsPerRun(b *testing.B, f func()) {
function reportThroughput (line 858) | func reportThroughput(b *testing.B, numValues, numBytes int, do func()) {
function generateLevelValues (line 866) | func generateLevelValues(n int, r *rand.Rand) []uint8 {
function generateBooleanValues (line 874) | func generateBooleanValues(n int, r *rand.Rand) []byte {
function generateInt32Values (line 880) | func generateInt32Values(n int, r *rand.Rand) []int32 {
function generateInt64Values (line 888) | func generateInt64Values(n int, r *rand.Rand) []int64 {
function generateFloatValues (line 896) | func generateFloatValues(n int, r *rand.Rand) []float32 {
function generateDoubleValues (line 904) | func generateDoubleValues(n int, r *rand.Rand) []float64 {
function generateByteArrayValues (line 912) | func generateByteArrayValues(n int, r *rand.Rand) ([]byte, []uint32) {
function generateFixedLenByteArrayValues (line 929) | func generateFixedLenByteArrayValues(n int, r *rand.Rand, size int) []by...
function maxLenInt8 (line 935) | func maxLenInt8(data []int8) int {
function maxLenInt32 (line 945) | func maxLenInt32(data []int32) int {
function maxLenInt64 (line 955) | func maxLenInt64(data []int64) int {
FILE: encoding/fuzz/fuzz.go
function EncodeBoolean (line 16) | func EncodeBoolean(f *testing.F, e encoding.Encoding) {
function EncodeLevels (line 24) | func EncodeLevels(f *testing.F, e encoding.Encoding) {
function EncodeInt32 (line 32) | func EncodeInt32(f *testing.F, e encoding.Encoding) {
function EncodeInt64 (line 40) | func EncodeInt64(f *testing.F, e encoding.Encoding) {
function EncodeFloat (line 48) | func EncodeFloat(f *testing.F, e encoding.Encoding) {
function EncodeDouble (line 56) | func EncodeDouble(f *testing.F, e encoding.Encoding) {
function EncodeByteArray (line 64) | func EncodeByteArray(f *testing.F, e encoding.Encoding) {
type encodingFunc (line 122) | type encodingFunc
type decodingFunc (line 124) | type decodingFunc
type generateFunc (line 126) | type generateFunc
function encode (line 128) | func encode[T comparable](f *testing.F, e encoding.Encoding, encode enco...
function equal (line 160) | func equal[T comparable](a, b []T) bool {
function generate (line 172) | func generate[T comparable](dst []T, src []byte, prng *rand.Rand) []T {
FILE: encoding/notsupported.go
function Error (line 31) | func Error(e Encoding, err error) error {
function Errorf (line 37) | func Errorf(e Encoding, msg string, args ...interface{}) error {
function ErrEncodeInvalidInputSize (line 43) | func ErrEncodeInvalidInputSize(e Encoding, typ string, size int) error {
function ErrDecodeInvalidInputSize (line 49) | func ErrDecodeInvalidInputSize(e Encoding, typ string, size int) error {
function errInvalidInputSize (line 53) | func errInvalidInputSize(e Encoding, op, typ string, size int) error {
function CanEncodeLevels (line 58) | func CanEncodeLevels(e Encoding) bool {
function CanEncodeBoolean (line 64) | func CanEncodeBoolean(e Encoding) bool {
function CanEncodeInt32 (line 70) | func CanEncodeInt32(e Encoding) bool {
function CanEncodeInt64 (line 76) | func CanEncodeInt64(e Encoding) bool {
function CanEncodeInt96 (line 82) | func CanEncodeInt96(e Encoding) bool {
function CanEncodeFloat (line 88) | func CanEncodeFloat(e Encoding) bool {
function CanEncodeDouble (line 94) | func CanEncodeDouble(e Encoding) bool {
function CanEncodeByteArray (line 100) | func CanEncodeByteArray(e Encoding) bool {
function CanEncodeFixedLenByteArray (line 107) | func CanEncodeFixedLenByteArray(e Encoding) bool {
type NotSupported (line 116) | type NotSupported struct
method String (line 119) | func (NotSupported) String() string {
method Encoding (line 123) | func (NotSupported) Encoding() format.Encoding {
method EncodeLevels (line 127) | func (NotSupported) EncodeLevels(dst []byte, src []uint8) ([]byte, err...
method EncodeBoolean (line 131) | func (NotSupported) EncodeBoolean(dst []byte, src []byte) ([]byte, err...
method EncodeInt32 (line 135) | func (NotSupported) EncodeInt32(dst []byte, src []int32) ([]byte, erro...
method EncodeInt64 (line 139) | func (NotSupported) EncodeInt64(dst []byte, src []int64) ([]byte, erro...
method EncodeInt96 (line 143) | func (NotSupported) EncodeInt96(dst []byte, src []deprecated.Int96) ([...
method EncodeFloat (line 147) | func (NotSupported) EncodeFloat(dst []byte, src []float32) ([]byte, er...
method EncodeDouble (line 151) | func (NotSupported) EncodeDouble(dst []byte, src []float64) ([]byte, e...
method EncodeByteArray (line 155) | func (NotSupported) EncodeByteArray(dst []byte, src []byte, offsets []...
method EncodeFixedLenByteArray (line 159) | func (NotSupported) EncodeFixedLenByteArray(dst []byte, src []byte, si...
method DecodeLevels (line 163) | func (NotSupported) DecodeLevels(dst []uint8, src []byte) ([]uint8, er...
method DecodeBoolean (line 167) | func (NotSupported) DecodeBoolean(dst []byte, src []byte) ([]byte, err...
method DecodeInt32 (line 171) | func (NotSupported) DecodeInt32(dst []int32, src []byte) ([]int32, err...
method DecodeInt64 (line 175) | func (NotSupported) DecodeInt64(dst []int64, src []byte) ([]int64, err...
method DecodeInt96 (line 179) | func (NotSupported) DecodeInt96(dst []deprecated.Int96, src []byte) ([...
method DecodeFloat (line 183) | func (NotSupported) DecodeFloat(dst []float32, src []byte) ([]float32,...
method DecodeDouble (line 187) | func (NotSupported) DecodeDouble(dst []float64, src []byte) ([]float64...
method DecodeByteArray (line 191) | func (NotSupported) DecodeByteArray(dst []byte, src []byte, offsets []...
method DecodeFixedLenByteArray (line 195) | func (NotSupported) DecodeFixedLenByteArray(dst []byte, src []byte, si...
method EstimateDecodeByteArraySize (line 199) | func (NotSupported) EstimateDecodeByteArraySize(src []byte) int {
method CanDecodeInPlace (line 203) | func (NotSupported) CanDecodeInPlace() bool {
function errNotSupported (line 207) | func errNotSupported(typ string) error {
FILE: encoding/plain/dictionary.go
type DictionaryEncoding (line 8) | type DictionaryEncoding struct
method String (line 13) | func (e *DictionaryEncoding) String() string {
method Encoding (line 17) | func (e *DictionaryEncoding) Encoding() format.Encoding {
method EncodeInt32 (line 21) | func (e *DictionaryEncoding) EncodeInt32(dst []byte, src []int32) ([]b...
method DecodeInt32 (line 25) | func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]i...
FILE: encoding/plain/plain.go
constant ByteArrayLengthSize (line 19) | ByteArrayLengthSize = 4
constant MaxByteArrayLength (line 20) | MaxByteArrayLength = math.MaxInt32
type Encoding (line 23) | type Encoding struct
method String (line 27) | func (e *Encoding) String() string {
method Encoding (line 31) | func (e *Encoding) Encoding() format.Encoding {
method EncodeBoolean (line 35) | func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, erro...
method EncodeInt32 (line 39) | func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
method EncodeInt64 (line 43) | func (e *Encoding) EncodeInt64(dst []byte, src []int64) ([]byte, error) {
method EncodeInt96 (line 47) | func (e *Encoding) EncodeInt96(dst []byte, src []deprecated.Int96) ([]...
method EncodeFloat (line 51) | func (e *Encoding) EncodeFloat(dst []byte, src []float32) ([]byte, err...
method EncodeDouble (line 55) | func (e *Encoding) EncodeDouble(dst []byte, src []float64) ([]byte, er...
method EncodeByteArray (line 59) | func (e *Encoding) EncodeByteArray(dst []byte, src []byte, offsets []u...
method EncodeFixedLenByteArray (line 74) | func (e *Encoding) EncodeFixedLenByteArray(dst []byte, src []byte, siz...
method DecodeBoolean (line 81) | func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, erro...
method DecodeInt32 (line 85) | func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, erro...
method DecodeInt64 (line 92) | func (e *Encoding) DecodeInt64(dst []int64, src []byte) ([]int64, erro...
method DecodeInt96 (line 99) | func (e *Encoding) DecodeInt96(dst []deprecated.Int96, src []byte) ([]...
method DecodeFloat (line 106) | func (e *Encoding) DecodeFloat(dst []float32, src []byte) ([]float32, ...
method DecodeDouble (line 113) | func (e *Encoding) DecodeDouble(dst []float64, src []byte) ([]float64,...
method DecodeByteArray (line 120) | func (e *Encoding) DecodeByteArray(dst []byte, src []byte, offsets []u...
method DecodeFixedLenByteArray (line 140) | func (e *Encoding) DecodeFixedLenByteArray(dst []byte, src []byte, siz...
method EstimateDecodeByteArraySize (line 150) | func (e *Encoding) EstimateDecodeByteArraySize(src []byte) int {
method CanDecodeInPlace (line 154) | func (e *Encoding) CanDecodeInPlace() bool {
function Boolean (line 158) | func Boolean(v bool) []byte { return AppendBoolean(nil, 0, v) }
function Int32 (line 160) | func Int32(v int32) []byte { return AppendInt32(nil, v) }
function Int64 (line 162) | func Int64(v int64) []byte { return AppendInt64(nil, v) }
function Int96 (line 164) | func Int96(v deprecated.Int96) []byte { return AppendInt96(nil, v) }
function Float (line 166) | func Float(v float32) []byte { return AppendFloat(nil, v) }
function Double (line 168) | func Double(v float64) []byte { return AppendDouble(nil, v) }
function ByteArray (line 170) | func ByteArray(v []byte) []byte { return AppendByteArray(nil, v) }
function AppendBoolean (line 172) | func AppendBoolean(b []byte, n int, v bool) []byte {
function AppendInt32 (line 194) | func AppendInt32(b []byte, v int32) []byte {
function AppendInt64 (line 200) | func AppendInt64(b []byte, v int64) []byte {
function AppendInt96 (line 206) | func AppendInt96(b []byte, v deprecated.Int96) []byte {
function AppendFloat (line 214) | func AppendFloat(b []byte, v float32) []byte {
function AppendDouble (line 220) | func AppendDouble(b []byte, v float64) []byte {
function AppendByteArray (line 226) | func AppendByteArray(b, v []byte) []byte {
function AppendByteArrayString (line 234) | func AppendByteArrayString(b []byte, v string) []byte {
function AppendByteArrayLength (line 242) | func AppendByteArrayLength(b []byte, n int) []byte {
function ByteArrayLength (line 248) | func ByteArrayLength(b []byte) int {
function PutByteArrayLength (line 252) | func PutByteArrayLength(b []byte, n int) {
function RangeByteArray (line 256) | func RangeByteArray(b []byte, do func([]byte) error) (err error) {
function NextByteArray (line 269) | func NextByteArray(b []byte) (v, r []byte, err error) {
function ErrTooShort (line 284) | func ErrTooShort(length int) error {
function ErrTooLarge (line 288) | func ErrTooLarge(length int) error {
FILE: encoding/plain/plain_test.go
function TestAppendBoolean (line 10) | func TestAppendBoolean(t *testing.T) {
FILE: encoding/rle/dictionary.go
type DictionaryEncoding (line 11) | type DictionaryEncoding struct
method String (line 15) | func (e *DictionaryEncoding) String() string {
method Encoding (line 19) | func (e *DictionaryEncoding) Encoding() format.Encoding {
method EncodeInt32 (line 23) | func (e *DictionaryEncoding) EncodeInt32(dst []byte, src []int32) ([]b...
method DecodeInt32 (line 30) | func (e *DictionaryEncoding) DecodeInt32(dst []int32, src []byte) ([]i...
method wrap (line 39) | func (e *DictionaryEncoding) wrap(err error) error {
function clearInt32 (line 46) | func clearInt32(data []int32) {
function maxLenInt32 (line 52) | func maxLenInt32(data []int32) (max int) {
FILE: encoding/rle/rle.go
constant maxSupportedValueCount (line 28) | maxSupportedValueCount = 16 * 1024 * 1024
type Encoding (line 31) | type Encoding struct
method String (line 36) | func (e *Encoding) String() string {
method Encoding (line 40) | func (e *Encoding) Encoding() format.Encoding {
method EncodeLevels (line 44) | func (e *Encoding) EncodeLevels(dst []byte, src []uint8) ([]byte, erro...
method EncodeBoolean (line 49) | func (e *Encoding) EncodeBoolean(dst []byte, src []byte) ([]byte, erro...
method EncodeInt32 (line 59) | func (e *Encoding) EncodeInt32(dst []byte, src []int32) ([]byte, error) {
method DecodeLevels (line 64) | func (e *Encoding) DecodeLevels(dst []uint8, src []byte) ([]uint8, err...
method DecodeBoolean (line 69) | func (e *Encoding) DecodeBoolean(dst []byte, src []byte) ([]byte, erro...
method DecodeInt32 (line 85) | func (e *Encoding) DecodeInt32(dst []int32, src []byte) ([]int32, erro...
method wrap (line 91) | func (e *Encoding) wrap(err error) error {
function encodeBits (line 98) | func encodeBits(dst, src []byte) ([]byte, error) {
function encodeBytes (line 142) | func encodeBytes(dst, src []byte, bitWidth uint) ([]byte, error) {
function encodeInt32 (line 194) | func encodeInt32(dst []byte, src []int32, bitWidth uint) ([]byte, error) {
function decodeBits (line 242) | func decodeBits(dst, src []byte) ([]byte, error) {
function decodeBytes (line 283) | func decodeBytes(dst, src []byte, bitWidth uint) ([]byte, error) {
function decodeInt32 (line 337) | func decodeInt32(dst, src []byte, bitWidth uint) ([]byte, error) {
function errEncodeInvalidBitWidth (line 396) | func errEncodeInvalidBitWidth(typ string, bitWidth uint) error {
function errDecodeInvalidBitWidth (line 400) | func errDecodeInvalidBitWidth(typ string, bitWidth uint) error {
function errInvalidBitWidth (line 404) | func errInvalidBitWidth(op, typ string, bitWidth uint) error {
function appendRepeat (line 408) | func appendRepeat(dst, pattern []byte, count uint) []byte {
function appendUvarint (line 419) | func appendUvarint(dst []byte, u uint64) []byte {
function appendRunLengthBits (line 425) | func appendRunLengthBits(dst []byte, count int, value byte) []byte {
function appendBitPackedBits (line 429) | func appendBitPackedBits(dst []byte, words []byte) []byte {
function appendRunLengthBytes (line 437) | func appendRunLengthBytes(dst []byte, count int, value byte) []byte {
function appendBitPackedBytes (line 445) | func appendBitPackedBytes(dst []byte, words []uint64, bitWidth uint) []b...
function appendRunLengthInt32 (line 453) | func appendRunLengthInt32(dst []byte, count int, value int32, bitWidth u...
function appendBitPackedInt32 (line 461) | func appendBitPackedInt32(dst []byte, words [][8]int32, bitWidth uint) [...
function broadcast8x1 (line 469) | func broadcast8x1(v uint64) uint64 {
function broadcast8x4 (line 473) | func broadcast8x4(v int32) [8]int32 {
function isZero (line 477) | func isZero(data []byte) bool {
function isOnes (line 481) | func isOnes(data []byte) bool {
function resize (line 485) | func resize(buf []byte, size int) []byte {
function grow (line 492) | func grow(buf []byte, size int) []byte {
function encodeInt32BitpackDefault (line 502) | func encodeInt32BitpackDefault(dst []byte, src [][8]int32, bitWidth uint...
function encodeBytesBitpackDefault (line 508) | func encodeBytesBitpackDefault(dst []byte, src []uint64, bitWidth uint) ...
function decodeBytesBitpackDefault (line 528) | func decodeBytesBitpackDefault(dst, src []byte, count, bitWidth uint) {
FILE: encoding/rle/rle_amd64.go
function init (line 16) | func init() {
function encodeBytesBitpackBMI2 (line 37) | func encodeBytesBitpackBMI2(dst []byte, src []uint64, bitWidth uint) int
function encodeInt32IndexEqual8ContiguousAVX2 (line 40) | func encodeInt32IndexEqual8ContiguousAVX2(words [][8]int32) int
function encodeInt32IndexEqual8ContiguousSSE (line 43) | func encodeInt32IndexEqual8ContiguousSSE(words [][8]int32) int
function encodeInt32Bitpack1to16bitsAVX2 (line 46) | func encodeInt32Bitpack1to16bitsAVX2(dst []byte, src [][8]int32, bitWidt...
function encodeInt32BitpackAVX2 (line 48) | func encodeInt32BitpackAVX2(dst []byte, src [][8]int32, bitWidth uint) i...
function decodeBytesBitpackBMI2 (line 60) | func decodeBytesBitpackBMI2(dst, src []byte, count, bitWidth uint)
FILE: encoding/rle/rle_amd64_test.go
function TestEncodeInt32IndexEqual8ContiguousAVX2 (line 8) | func TestEncodeInt32IndexEqual8ContiguousAVX2(t *testing.T) {
function TestEncodeInt32IndexEqual8ContiguousSSE (line 12) | func TestEncodeInt32IndexEqual8ContiguousSSE(t *testing.T) {
function BenchmarkEncodeInt32IndexEqual8ContiguousAVX2 (line 16) | func BenchmarkEncodeInt32IndexEqual8ContiguousAVX2(b *testing.B) {
function BenchmarkEncodeInt32IndexEqual8ContiguousSSE (line 20) | func BenchmarkEncodeInt32IndexEqual8ContiguousSSE(b *testing.B) {
FILE: encoding/rle/rle_purego.go
function encodeBytesBitpack (line 5) | func encodeBytesBitpack(dst []byte, src []uint64, bitWidth uint) int {
function encodeInt32IndexEqual8Contiguous (line 9) | func encodeInt32IndexEqual8Contiguous(words [][8]int32) (n int) {
function encodeInt32Bitpack (line 16) | func encodeInt32Bitpack(dst []byte, src [][8]int32, bitWidth uint) int {
function decodeBytesBitpack (line 20) | func decodeBytesBitpack(dst, src []byte, count, bitWidth uint) {
FILE: encoding/rle/rle_test.go
function FuzzEncodeBoolean (line 13) | func FuzzEncodeBoolean(f *testing.F) {
function FuzzEncodeLevels (line 17) | func FuzzEncodeLevels(f *testing.F) {
function FuzzEncodeInt32 (line 21) | func FuzzEncodeInt32(f *testing.F) {
function TestEncodeInt32IndexEqual8Contiguous (line 25) | func TestEncodeInt32IndexEqual8Contiguous(t *testing.T) {
function testEncodeInt32IndexEqual8Contiguous (line 29) | func testEncodeInt32IndexEqual8Contiguous(t *testing.T, f func([][8]int3...
function BenchmarkEncodeInt32IndexEqual8Contiguous (line 51) | func BenchmarkEncodeInt32IndexEqual8Contiguous(b *testing.B) {
function benchmarkEncodeInt32IndexEqual8Contiguous (line 55) | func benchmarkEncodeInt32IndexEqual8Contiguous(b *testing.B, f func([][8...
FILE: encoding/test/test_go18.go
function EncodeInt32 (line 12) | func EncodeInt32(t *testing.T, enc encoding.Encoding, min, max int, bitW...
function EncodeInt64 (line 28) | func EncodeInt64(t *testing.T, enc encoding.Encoding, min, max int, bitW...
function EncodeFloat (line 44) | func EncodeFloat(t *testing.T, enc encoding.Encoding, min, max int) {
function EncodeDouble (line 53) | func EncodeDouble(t *testing.T, enc encoding.Encoding, min, max int) {
type encodingFunc (line 62) | type encodingFunc
type decodingFunc (line 64) | type decodingFunc
function encode (line 66) | func encode[T comparable](t *testing.T, enc encoding.Encoding, min, max ...
function assertEqual (line 93) | func assertEqual[T comparable](want, got []T) error {
FILE: encoding/values.go
type Kind (line 10) | type Kind
method String (line 24) | func (kind Kind) String() string {
constant Undefined (line 13) | Undefined Kind = iota
constant Boolean (line 14) | Boolean
constant Int32 (line 15) | Int32
constant Int64 (line 16) | Int64
constant Int96 (line 17) | Int96
constant Float (line 18) | Float
constant Double (line 19) | Double
constant ByteArray (line 20) | ByteArray
constant FixedLenByteArray (line 21) | FixedLenByteArray
type Values (line 47) | type Values struct
method assertKind (line 54) | func (v *Values) assertKind(kind Kind) {
method assertSize (line 60) | func (v *Values) assertSize(size int) {
method Size (line 66) | func (v *Values) Size() int64 {
method Kind (line 70) | func (v *Values) Kind() Kind {
method Data (line 74) | func (v *Values) Data() (data []byte, offsets []uint32) {
method Boolean (line 78) | func (v *Values) Boolean() []byte {
method Int32 (line 83) | func (v *Values) Int32() []int32 {
method Int64 (line 88) | func (v *Values) Int64() []int64 {
method Int96 (line 93) | func (v *Values) Int96() []deprecated.Int96 {
method Float (line 98) | func (v *Values) Float() []float32 {
method Double (line 103) | func (v *Values) Double() []float64 {
method ByteArray (line 108) | func (v *Values) ByteArray() (data []byte, offsets []uint32) {
method FixedLenByteArray (line 113) | func (v *Values) FixedLenByteArray() (data []byte, size int) {
method Uint32 (line 118) | func (v *Values) Uint32() []uint32 {
method Uint64 (line 123) | func (v *Values) Uint64() []uint64 {
method Uint128 (line 128) | func (v *Values) Uint128() [][16]byte {
function BooleanValues (line 134) | func BooleanValues(values []byte) Values {
function Int32Values (line 141) | func Int32Values(values []int32) Values {
function Int64Values (line 148) | func Int64Values(values []int64) Values {
function Int96Values (line 155) | func Int96Values(values []deprecated.Int96) Values {
function FloatValues (line 162) | func FloatValues(values []float32) Values {
function DoubleValues (line 169) | func DoubleValues(values []float64) Values {
function ByteArrayValues (line 176) | func ByteArrayValues(values []byte, offsets []uint32) Values {
function FixedLenByteArrayValues (line 184) | func FixedLenByteArrayValues(values []byte, size int) Values {
function Uint32Values (line 192) | func Uint32Values(values []uint32) Values {
function Uint64Values (line 196) | func Uint64Values(values []uint64) Values {
function Uint128Values (line 200) | func Uint128Values(values [][16]byte) Values {
function Int32ValuesFromBytes (line 204) | func Int32ValuesFromBytes(values []byte) Values {
function Int64ValuesFromBytes (line 211) | func Int64ValuesFromBytes(values []byte) Values {
function Int96ValuesFromBytes (line 218) | func Int96ValuesFromBytes(values []byte) Values {
function FloatValuesFromBytes (line 225) | func FloatValuesFromBytes(values []byte) Values {
functi
Condensed preview — 321 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,707K chars).
[
{
"path": ".gitattributes",
"chars": 41,
"preview": "internal/gen-go/* linguist-generated=true"
},
{
"path": ".github/workflows/test.yml",
"chars": 1457,
"preview": "name: Test\non:\n push:\n branches:\n - main\n pull_request:\n branches:\n - '*'\n\njobs:\n test:\n strategy:"
},
{
"path": ".gitignore",
"chars": 288,
"preview": "# Binaries for programs and plugins\n*.exe\n*.exe~\n*.dll\n*.so\n*.dylib\n\n# Test binary, built with `go test -c`\n*.test\n\n# Ou"
},
{
"path": ".mailmap",
"chars": 162,
"preview": "Achille Roussel <achille@segment.com> Achille <achille@segment.com>\nThomas Pelletier <thomas.pelletier@segment.com> Thom"
},
{
"path": ".words",
"chars": 228,
"preview": "\nRowType\nTwilio\nbottlenecked\ndecompressors\nint96\nmillis\nnanos\nreindexing\nrepositions\nschemas\nColumnPages\nPageIndex\nZstan"
},
{
"path": "AUTHORS.txt",
"chars": 198,
"preview": "Achille Roussel <achille@segment.com>\nFrederic Branczyk <fbranczyk@gmail.com>\nJulien Fabre <julien@segment.com>\nKevin Bu"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 3251,
"preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, w"
},
{
"path": "CONTRIBUTING.md",
"chars": 896,
"preview": "# Contributing to segmentio/parquet\n\n## Code of Conduct\n\nHelp us keep the project open and inclusive. Please be kind to "
},
{
"path": "LICENSE",
"chars": 11769,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "Makefile",
"chars": 246,
"preview": ".PHONY: format\n\nAUTHORS.txt: .mailmap\n\tgo install github.com/kevinburke/write_mailmap@latest\n\twrite_mailmap > AUTHORS.tx"
},
{
"path": "README.md",
"chars": 321,
"preview": "# Project has been Archived\n\nDevelopment has moved to https://github.com/parquet-go/parquet-go. No API's have\nchanged, w"
},
{
"path": "allocator.go",
"chars": 1521,
"preview": "package parquet\n\nimport \"github.com/segmentio/parquet-go/internal/unsafecast\"\n\ntype allocator struct{ buffer []byte }\n\nf"
},
{
"path": "array.go",
"chars": 664,
"preview": "package parquet\n\nimport (\n\t\"unsafe\"\n\n\t\"github.com/segmentio/parquet-go/sparse\"\n)\n\nfunc makeArrayValue(values []Value, of"
},
{
"path": "array_go18.go",
"chars": 661,
"preview": "//go:build go1.18\n\npackage parquet\n\nimport (\n\t\"unsafe\"\n\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n\t\"github."
},
{
"path": "bitmap.go",
"chars": 607,
"preview": "package parquet\n\nimport \"sync\"\n\ntype bitmap struct {\n\tbits []uint64\n}\n\nfunc (m *bitmap) reset(size int) {\n\tsize = (size "
},
{
"path": "bloom/block.go",
"chars": 580,
"preview": "package bloom\n\nimport \"unsafe\"\n\n// Word represents 32 bits words of bloom filter blocks.\ntype Word uint32\n\n// Block repr"
},
{
"path": "bloom/block_amd64.go",
"chars": 1335,
"preview": "//go:build !purego\n\npackage bloom\n\nimport \"golang.org/x/sys/cpu\"\n\n// The functions in this file are SIMD-optimized versi"
},
{
"path": "bloom/block_amd64.s",
"chars": 3253,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n#define salt0 0x47b6137b\n#define salt1 0x44974d91\n#define salt2 0x8824ad5b\n#d"
},
{
"path": "bloom/block_default.go",
"chars": 1249,
"preview": "//go:build purego && parquet.bloom.no_unroll\n\npackage bloom\n\n// This file contains direct translation of the algorithms "
},
{
"path": "bloom/block_optimized.go",
"chars": 1809,
"preview": "//go:build (!amd64 || purego) && !parquet.bloom.no_unroll\n\npackage bloom\n\n// The functions in this file are optimized ve"
},
{
"path": "bloom/block_test.go",
"chars": 960,
"preview": "package bloom_test\n\nimport (\n\t\"math\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/bloom\"\n)\n\nfunc TestBlock(t *testing.T"
},
{
"path": "bloom/bloom.go",
"chars": 421,
"preview": "// Package bloom implements parquet bloom filters.\npackage bloom\n\nfunc fasthash1x64(value uint64, scale int32) uint64 {\n"
},
{
"path": "bloom/bloom_test.go",
"chars": 702,
"preview": "package bloom\n\nimport (\n\t\"math/rand\"\n\t\"testing\"\n)\n\n// Test file for internal functions of the bloom package.\nvar global4"
},
{
"path": "bloom/filter.go",
"chars": 2841,
"preview": "package bloom\n\nimport (\n\t\"io\"\n\t\"sync\"\n\t\"unsafe\"\n)\n\n// Filter is an interface representing read-only bloom filters where "
},
{
"path": "bloom/filter_amd64.go",
"chars": 1314,
"preview": "//go:build !purego\n\npackage bloom\n\n// This file contains the signatures for bloom filter algorithms implemented in\n// fi"
},
{
"path": "bloom/filter_amd64.s",
"chars": 5638,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n#define salt0 0x47b6137b\n#define salt1 0x44974d91\n#define salt2 0x8824ad5b\n#d"
},
{
"path": "bloom/filter_default.go",
"chars": 344,
"preview": "//go:build purego || !amd64\n\npackage bloom\n\nfunc filterInsertBulk(f []Block, x []uint64) {\n\tfor i := range x {\n\t\tfilterI"
},
{
"path": "bloom/filter_test.go",
"chars": 3255,
"preview": "package bloom_test\n\nimport (\n\t\"bytes\"\n\t\"math/rand\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/bloom\"\n)\n\nfunc TestSpli"
},
{
"path": "bloom/hash.go",
"chars": 2056,
"preview": "package bloom\n\nimport \"github.com/segmentio/parquet-go/bloom/xxhash\"\n\n// Hash is an interface abstracting the hashing al"
},
{
"path": "bloom/xxhash/LICENSE",
"chars": 1310,
"preview": "The following files in this directory were derived from the open-source\nproject at https://github.com/cespare/xxhash. A "
},
{
"path": "bloom/xxhash/sum64uint.go",
"chars": 750,
"preview": "package xxhash\n\nfunc Sum64Uint8(v uint8) uint64 {\n\th := prime5 + 1\n\th ^= uint64(v) * prime5\n\treturn avalanche(rol11(h) *"
},
{
"path": "bloom/xxhash/sum64uint_amd64.go",
"chars": 1916,
"preview": "//go:build !purego\n\npackage xxhash\n\nimport \"golang.org/x/sys/cpu\"\n\n// This file contains the declaration of signatures f"
},
{
"path": "bloom/xxhash/sum64uint_amd64.s",
"chars": 19871,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n/*\nThe algorithms in this file are assembly versions of the Go functions in t"
},
{
"path": "bloom/xxhash/sum64uint_purego.go",
"chars": 924,
"preview": "//go:build purego || !amd64\n\npackage xxhash\n\nfunc MultiSum64Uint8(h []uint64, v []uint8) int {\n\tn := min(len(h), len(v))"
},
{
"path": "bloom/xxhash/sum64uint_test.go",
"chars": 6130,
"preview": "package xxhash_test\n\nimport (\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"testing\"\n\t\"testing/quick\"\n\t\"time\"\n\n\t\"github.com/segmentio/parq"
},
{
"path": "bloom/xxhash/xxhash.go",
"chars": 1601,
"preview": "// Package xxhash is an extension of github.com/cespare/xxhash which adds\n// routines optimized to hash arrays of fixed "
},
{
"path": "bloom/xxhash/xxhash_amd64.go",
"chars": 113,
"preview": "//go:build !purego\n\npackage xxhash\n\n// Sum64 computes the 64-bit xxHash digest of b.\nfunc Sum64(b []byte) uint64\n"
},
{
"path": "bloom/xxhash/xxhash_amd64.s",
"chars": 2870,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n#define PRIME1 0x9E3779B185EBCA87\n#define PRIME2 0xC2B2AE3D27D4EB4F\n#define P"
},
{
"path": "bloom/xxhash/xxhash_purego.go",
"chars": 978,
"preview": "//go:build purego || !amd64\n\npackage xxhash\n\n// Sum64 computes the 64-bit xxHash digest of b.\nfunc Sum64(b []byte) uint6"
},
{
"path": "bloom/xxhash/xxhash_test.go",
"chars": 1145,
"preview": "package xxhash_test\n\nimport (\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/bloom/xxhash\"\n)\n\nfunc TestSum64(t *testing.T"
},
{
"path": "bloom.go",
"chars": 8359,
"preview": "package parquet\n\nimport (\n\t\"io\"\n\n\t\"github.com/segmentio/parquet-go/bloom\"\n\t\"github.com/segmentio/parquet-go/bloom/xxhash"
},
{
"path": "bloom_test.go",
"chars": 3754,
"preview": "package parquet\n\nimport (\n\t\"math/rand\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/bloom\"\n\t\"github.com/segmentio/parqu"
},
{
"path": "buffer.go",
"chars": 14875,
"preview": "package parquet\n\nimport (\n\t\"log\"\n\t\"runtime\"\n\t\"sort\"\n\t\"sync\"\n\t\"sync/atomic\"\n\n\t\"github.com/segmentio/parquet-go/internal/d"
},
{
"path": "buffer_go18.go",
"chars": 4567,
"preview": "//go:build go1.18\n\npackage parquet\n\nimport (\n\t\"reflect\"\n\t\"sort\"\n)\n\n// GenericBuffer is similar to a Buffer but uses a ty"
},
{
"path": "buffer_go18_test.go",
"chars": 6302,
"preview": "//go:build go1.18\n\npackage parquet_test\n\nimport (\n\t\"encoding/binary\"\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"math/rand\"\n\t\"reflect\"\n\t\"so"
},
{
"path": "buffer_internal_test.go",
"chars": 1606,
"preview": "package parquet\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"testing\"\n)\n\nfunc TestBufferAlwaysCorrectSize(t *testing.T) {\n\tvar p buff"
},
{
"path": "buffer_pool.go",
"chars": 4606,
"preview": "package parquet\n\nimport (\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"sync\"\n)\n\n// BufferPool is an interface abstracting the u"
},
{
"path": "buffer_pool_test.go",
"chars": 2653,
"preview": "package parquet_test\n\nimport (\n\t\"bytes\"\n\t\"io\"\n\t\"strings\"\n\t\"testing\"\n\t\"testing/iotest\"\n\n\t\"github.com/segmentio/parquet-go"
},
{
"path": "buffer_test.go",
"chars": 17084,
"preview": "package parquet_test\n\nimport (\n\t\"bytes\"\n\t\"errors\"\n\t\"io\"\n\t\"math\"\n\t\"math/rand\"\n\t\"reflect\"\n\t\"sort\"\n\t\"strconv\"\n\t\"testing\"\n\n\t"
},
{
"path": "column.go",
"chars": 23886,
"preview": "package parquet\n\nimport (\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"io\"\n\t\"reflect\"\n\n\t\"github.com/segmentio/parquet-go/compress\"\n\t\"gith"
},
{
"path": "column_buffer.go",
"chars": 58601,
"preview": "package parquet\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"io\"\n\t\"sort\"\n\t\"unsafe\"\n\n\t\"github.com/segmentio/parquet-go/deprecated\"\n\t\"githu"
},
{
"path": "column_buffer_amd64.go",
"chars": 679,
"preview": "//go:build !purego\n\npackage parquet\n\nimport (\n\t\"github.com/segmentio/parquet-go/internal/bytealg\"\n\t\"github.com/segmentio"
},
{
"path": "column_buffer_amd64.s",
"chars": 1338,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n// func broadcastRangeInt32AVX2(dst []int32, base int32)\nTEXT ·broadcastRange"
},
{
"path": "column_buffer_go18.go",
"chars": 12604,
"preview": "//go:build go1.18\n\npackage parquet\n\nimport (\n\t\"encoding/json\"\n\t\"math/bits\"\n\t\"reflect\"\n\t\"time\"\n\t\"unsafe\"\n\n\t\"github.com/se"
},
{
"path": "column_buffer_purego.go",
"chars": 537,
"preview": "//go:build !amd64 || purego\n\npackage parquet\n\nimport \"github.com/segmentio/parquet-go/sparse\"\n\nfunc broadcastValueInt32("
},
{
"path": "column_buffer_test.go",
"chars": 1733,
"preview": "package parquet\n\nimport (\n\t\"testing\"\n)\n\nfunc TestBroadcastValueInt32(t *testing.T) {\n\tbuf := make([]int32, 123)\n\tbroadca"
},
{
"path": "column_chunk.go",
"chars": 6456,
"preview": "package parquet\n\nimport (\n\t\"io\"\n)\n\n// The ColumnChunk interface represents individual columns of a row group.\ntype Colum"
},
{
"path": "column_index.go",
"chars": 22969,
"preview": "package parquet\n\nimport (\n\t\"github.com/segmentio/parquet-go/deprecated\"\n\t\"github.com/segmentio/parquet-go/encoding/plain"
},
{
"path": "column_index_internal_test.go",
"chars": 635,
"preview": "package parquet\n\nimport (\n\t\"bytes\"\n\t\"testing\"\n)\n\nfunc TestIncrementByteArrayInplace(t *testing.T) {\n\ttestCases := [][]by"
},
{
"path": "column_index_test.go",
"chars": 2360,
"preview": "package parquet_test\n\nimport (\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go\"\n)\n\nfunc TestBinaryColumnIndexMinMax(t *tes"
},
{
"path": "column_mapping.go",
"chars": 1994,
"preview": "package parquet\n\n// LeafColumn is a struct type representing leaf columns of a parquet schema.\ntype LeafColumn struct {\n"
},
{
"path": "column_mapping_test.go",
"chars": 628,
"preview": "package parquet_test\n\nimport (\n\t\"fmt\"\n\t\"strings\"\n\n\t\"github.com/segmentio/parquet-go\"\n)\n\nfunc ExampleSchema_Lookup() {\n\ts"
},
{
"path": "column_path.go",
"chars": 2232,
"preview": "package parquet\n\nimport (\n\t\"strings\"\n)\n\ntype columnPath []string\n\nfunc (path columnPath) append(names ...string) columnP"
},
{
"path": "column_test.go",
"chars": 15156,
"preview": "package parquet_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"testing\"\n\n\t\"github.com/google/uuid\"\n\t\"github.com/segmentio/parquet-"
},
{
"path": "compare.go",
"chars": 7208,
"preview": "package parquet\n\nimport (\n\t\"encoding/binary\"\n\n\t\"github.com/segmentio/parquet-go/deprecated\"\n)\n\n// CompareDescending cons"
},
{
"path": "compare_test.go",
"chars": 1114,
"preview": "package parquet\n\nimport \"testing\"\n\nfunc assertCompare(t *testing.T, a, b Value, cmp func(Value, Value) int, want int) {\n"
},
{
"path": "compress/brotli/brotli.go",
"chars": 1312,
"preview": "// Package brotli implements the BROTLI parquet compression codec.\npackage brotli\n\nimport (\n\t\"io\"\n\n\t\"github.com/andybalh"
},
{
"path": "compress/compress.go",
"chars": 3051,
"preview": "// Package compress provides the generic APIs implemented by parquet compression\n// codecs.\n//\n// https://github.com/apa"
},
{
"path": "compress/compress_test.go",
"chars": 3529,
"preview": "package compress_test\n\nimport (\n\t\"bytes\"\n\t\"io\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/compress\"\n\t\"github.com/segm"
},
{
"path": "compress/gzip/gzip.go",
"chars": 1415,
"preview": "// Package gzip implements the GZIP parquet compression codec.\npackage gzip\n\nimport (\n\t\"io\"\n\t\"strings\"\n\n\t\"github.com/kla"
},
{
"path": "compress/lz4/lz4.go",
"chars": 1961,
"preview": "// Package lz4 implements the LZ4_RAW parquet compression codec.\npackage lz4\n\nimport (\n\t\"github.com/pierrec/lz4/v4\"\n\t\"gi"
},
{
"path": "compress/snappy/snappy.go",
"chars": 848,
"preview": "// Package snappy implements the SNAPPY parquet compression codec.\npackage snappy\n\nimport (\n\t\"github.com/klauspost/compr"
},
{
"path": "compress/uncompressed/uncompressed.go",
"chars": 605,
"preview": "// Package uncompressed provides implementations of the compression codec\n// interfaces as pass-through without applying"
},
{
"path": "compress/zstd/zstd.go",
"chars": 2126,
"preview": "// Package zstd implements the ZSTD parquet compression codec.\npackage zstd\n\nimport (\n\t\"sync\"\n\n\t\"github.com/klauspost/co"
},
{
"path": "compress.go",
"chars": 2448,
"preview": "package parquet\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/segmentio/parquet-go/compress\"\n\t\"github.com/segmentio/parquet-go/compress"
},
{
"path": "config.go",
"chars": 27358,
"preview": "package parquet\n\nimport (\n\t\"fmt\"\n\t\"math\"\n\t\"runtime/debug\"\n\t\"strings\"\n\t\"sync\"\n\n\t\"github.com/segmentio/parquet-go/compress"
},
{
"path": "convert.go",
"chars": 30368,
"preview": "package parquet\n\nimport (\n\t\"encoding/binary\"\n\t\"encoding/hex\"\n\t\"fmt\"\n\t\"io\"\n\t\"math\"\n\t\"math/big\"\n\t\"strconv\"\n\t\"sync\"\n\t\"time\""
},
{
"path": "convert_test.go",
"chars": 28800,
"preview": "package parquet_test\n\nimport (\n\t\"reflect\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/segmentio/parquet-go\"\n\t\"github.com/segmentio/"
},
{
"path": "dedupe.go",
"chars": 2732,
"preview": "package parquet\n\n// DedupeRowReader constructs a row reader which drops duplicated consecutive\n// rows, according to the"
},
{
"path": "dedupe_test.go",
"chars": 2124,
"preview": "//go:build go1.18\n\npackage parquet_test\n\nimport (\n\t\"sort\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go\"\n)\n\nfunc TestDed"
},
{
"path": "deprecated/int96.go",
"chars": 3767,
"preview": "package deprecated\n\nimport (\n\t\"math/big\"\n\t\"math/bits\"\n\t\"unsafe\"\n)\n\n// Int96 is an implementation of the deprecated INT96"
},
{
"path": "deprecated/int96_test.go",
"chars": 2384,
"preview": "package deprecated_test\n\nimport (\n\t\"fmt\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/deprecated\"\n)\n\nfunc TestInt96Less"
},
{
"path": "deprecated/parquet.go",
"chars": 3778,
"preview": "package deprecated\n\n// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet.\n// ConvertedType is su"
},
{
"path": "dictionary.go",
"chars": 39949,
"preview": "package parquet\n\nimport (\n\t\"io\"\n\t\"math/bits\"\n\t\"unsafe\"\n\n\t\"github.com/segmentio/parquet-go/deprecated\"\n\t\"github.com/segme"
},
{
"path": "dictionary_amd64.go",
"chars": 5448,
"preview": "//go:build !purego\n\npackage parquet\n\nimport (\n\t\"unsafe\"\n\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n\t\"github"
},
{
"path": "dictionary_amd64.s",
"chars": 19421,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n#define errnoIndexOutOfBounds 1\n\n// func dictionaryBoundsInt32(dict []int32, "
},
{
"path": "dictionary_purego.go",
"chars": 4265,
"preview": "//go:build purego || !amd64\n\npackage parquet\n\nimport (\n\t\"unsafe\"\n\n\t\"github.com/segmentio/parquet-go/sparse\"\n)\n\nfunc (d *"
},
{
"path": "dictionary_test.go",
"chars": 6621,
"preview": "package parquet_test\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/segmentio/parquet-go\"\n)\n\nva"
},
{
"path": "encoding/bitpacked/bitpacked.go",
"chars": 2350,
"preview": "package bitpacked\n\nimport (\n\t\"github.com/segmentio/parquet-go/encoding\"\n\t\"github.com/segmentio/parquet-go/format\"\n)\n\ntyp"
},
{
"path": "encoding/bitpacked/bitpacked_test.go",
"chars": 271,
"preview": "//go:build go1.18\n// +build go1.18\n\npackage bitpacked_test\n\nimport (\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/encod"
},
{
"path": "encoding/bytestreamsplit/bytestreamsplit.go",
"chars": 1689,
"preview": "package bytestreamsplit\n\nimport (\n\t\"github.com/segmentio/parquet-go/encoding\"\n\t\"github.com/segmentio/parquet-go/format\"\n"
},
{
"path": "encoding/bytestreamsplit/bytestreamsplit_amd64.go",
"chars": 673,
"preview": "//go:build !purego\n\npackage bytestreamsplit\n\nimport (\n\t\"golang.org/x/sys/cpu\"\n)\n\nvar encodeFloatHasAVX512 = cpu.X86.HasA"
},
{
"path": "encoding/bytestreamsplit/bytestreamsplit_amd64.s",
"chars": 9209,
"preview": " //go:build !purego\n\n#include \"textflag.h\"\n\n// This file contains optimizations of the BYTE_STREAM_SPLIT encoding using "
},
{
"path": "encoding/bytestreamsplit/bytestreamsplit_purego.go",
"chars": 1689,
"preview": "//go:build purego || !amd64\n\npackage bytestreamsplit\n\nimport \"github.com/segmentio/parquet-go/internal/unsafecast\"\n\nfunc"
},
{
"path": "encoding/bytestreamsplit/bytestreamsplit_test.go",
"chars": 635,
"preview": "//go:build go1.18\n// +build go1.18\n\npackage bytestreamsplit_test\n\nimport (\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go"
},
{
"path": "encoding/delta/binary_packed.go",
"chars": 12862,
"preview": "package delta\n\nimport (\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"io\"\n\t\"math\"\n\t\"math/bits\"\n\n\t\"github.com/segmentio/parquet-go/encoding"
},
{
"path": "encoding/delta/binary_packed_amd64.go",
"chars": 6949,
"preview": "//go:build !purego\n\npackage delta\n\nimport (\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n\t\"golang.org/x/sys/cp"
},
{
"path": "encoding/delta/binary_packed_amd64.s",
"chars": 23508,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n#define blockSize 128\n#define numMiniBlocks 4\n#define miniBlockSize 32\n\n// --"
},
{
"path": "encoding/delta/binary_packed_amd64_test.go",
"chars": 2919,
"preview": "//go:build amd64 && !purego\n\npackage delta\n\nimport (\n\t\"testing\"\n\n\t\"golang.org/x/sys/cpu\"\n)\n\nfunc requireAVX2(t testing.T"
},
{
"path": "encoding/delta/binary_packed_purego.go",
"chars": 2347,
"preview": "//go:build purego || !amd64\n\npackage delta\n\nimport (\n\t\"encoding/binary\"\n)\n\nfunc encodeMiniBlockInt32(dst []byte, src *[m"
},
{
"path": "encoding/delta/binary_packed_test.go",
"chars": 9478,
"preview": "package delta\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"math/bits\"\n\t\"testing\"\n)\n\nfunc maxLen32(miniBlock []int32) (maxLen int) {\n\tfor "
},
{
"path": "encoding/delta/byte_array.go",
"chars": 5424,
"preview": "package delta\n\nimport (\n\t\"bytes\"\n\t\"sort\"\n\n\t\"github.com/segmentio/parquet-go/encoding\"\n\t\"github.com/segmentio/parquet-go/"
},
{
"path": "encoding/delta/byte_array_amd64.go",
"chars": 3773,
"preview": "//go:build !purego\n\npackage delta\n\nimport (\n\t\"golang.org/x/sys/cpu\"\n)\n\n//go:noescape\nfunc validatePrefixAndSuffixLengthV"
},
{
"path": "encoding/delta/byte_array_amd64.s",
"chars": 5306,
"preview": "//go:build !purego\n\n#include \"funcdata.h\"\n#include \"textflag.h\"\n\n// func validatePrefixAndSuffixLengthValuesAVX2(prefix,"
},
{
"path": "encoding/delta/byte_array_purego.go",
"chars": 1542,
"preview": "//go:build purego || !amd64\n\npackage delta\n\nfunc decodeByteArray(dst, src []byte, prefix, suffix []int32, offsets []uint"
},
{
"path": "encoding/delta/byte_array_test.go",
"chars": 2414,
"preview": "package delta\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"testing\"\n)\n\nfunc TestLinearSearchPrefixLength(t *testing.T) {\n\ttestSearchPrefi"
},
{
"path": "encoding/delta/delta.go",
"chars": 2277,
"preview": "package delta\n\nimport (\n\t\"fmt\"\n\t\"sync\"\n\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n)\n\ntype int32Buffer struc"
},
{
"path": "encoding/delta/delta_amd64.go",
"chars": 186,
"preview": "//go:build !purego\n\npackage delta\n\nconst (\n\tpadding = 64\n)\n\nfunc findNegativeLength(lengths []int32) int {\n\tfor _, n := "
},
{
"path": "encoding/delta/delta_amd64.s",
"chars": 339,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\nGLOBL ·rotateLeft32(SB), RODATA|NOPTR, $32\nDATA ·rotateLeft32+0(SB)/4, $7\nDAT"
},
{
"path": "encoding/delta/delta_test.go",
"chars": 1307,
"preview": "//go:build go1.18\n// +build go1.18\n\npackage delta_test\n\nimport (\n\t\"fmt\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/en"
},
{
"path": "encoding/delta/length_byte_array.go",
"chars": 1966,
"preview": "package delta\n\nimport (\n\t\"github.com/segmentio/parquet-go/encoding\"\n\t\"github.com/segmentio/parquet-go/format\"\n)\n\ntype Le"
},
{
"path": "encoding/delta/length_byte_array_amd64.go",
"chars": 231,
"preview": "//go:build !purego\n\npackage delta\n\n//go:noescape\nfunc encodeByteArrayLengths(lengths []int32, offsets []uint32)\n\n//go:no"
},
{
"path": "encoding/delta/length_byte_array_amd64.s",
"chars": 2917,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n// func encodeByteArrayLengths(lengths []int32, offsets []uint32)\nTEXT ·encod"
},
{
"path": "encoding/delta/length_byte_array_purego.go",
"chars": 482,
"preview": "//go:build purego || !amd64\n\npackage delta\n\nfunc encodeByteArrayLengths(lengths []int32, offsets []uint32) {\n\tfor i := r"
},
{
"path": "encoding/delta/length_byte_array_test.go",
"chars": 883,
"preview": "package delta\n\nimport \"testing\"\n\nfunc TestDecodeByteArrayLengths(t *testing.T) {\n\tlengths := make([]int32, 999)\n\toffsets"
},
{
"path": "encoding/delta/testdata/fuzz/FuzzDeltaByteArray/2404234dd7e87c04303eb7e58208d5b2ccb04fb616c18f3254e2375c4bc327e3",
"chars": 70,
"preview": "go test fuzz v1\n[]byte(\"\\x80\\xf8\\xa9\\xaf\\x14\\xfc\\r\\rR1000\")\nint64(13)\n"
},
{
"path": "encoding/delta/testdata/fuzz/FuzzDeltaByteArray/4cf9c92e5a2096e3d6c42eaf9b1e31d2567854d33e06c8d2d7a8c46437345850",
"chars": 77,
"preview": "go test fuzz v1\n[]byte(\"\\xa1\\xa1\\xa1\\xa1\\xa1\\xa1\\xa1\\xa1\\xa100\")\nint64(-180)\n"
},
{
"path": "encoding/delta/testdata/fuzz/FuzzDeltaByteArray/9b210529f5e34e2dea5824929bf0d8242dc9c3165c0dce10bb376c50e21b38cc",
"chars": 828,
"preview": "go test fuzz v1\n[]byte(\"\\x800000\\xc9\\xc9\\xc9\\xc9\\xc9\\xc9\\xc9\\xc9\\xc90000000000000000000000000000000000000000000000000000"
},
{
"path": "encoding/delta/testdata/fuzz/FuzzDeltaByteArray/fbe137144bcda3a149c8ea109703f3242192c5480ea1e82dde0ea24e94f3afef",
"chars": 47,
"preview": "go test fuzz v1\n[]byte(\"\\x8000000\")\nint64(-97)\n"
},
{
"path": "encoding/encoding.go",
"chars": 2869,
"preview": "// Package encoding provides the generic APIs implemented by parquet encodings\n// in its sub-packages.\npackage encoding\n"
},
{
"path": "encoding/encoding_test.go",
"chars": 24024,
"preview": "package encoding_test\n\nimport (\n\t\"bytes\"\n\t\"io\"\n\t\"math\"\n\t\"math/bits\"\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/segmen"
},
{
"path": "encoding/fuzz/fuzz.go",
"chars": 3804,
"preview": "//go:build go1.18\n// +build go1.18\n\n// Package fuzz contains functions to help fuzz test parquet encodings.\npackage fuzz"
},
{
"path": "encoding/notsupported.go",
"chars": 6562,
"preview": "package encoding\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\n\t\"github.com/segmentio/parquet-go/deprecated\"\n\t\"github.com/segmentio/parque"
},
{
"path": "encoding/plain/dictionary.go",
"chars": 607,
"preview": "package plain\n\nimport (\n\t\"github.com/segmentio/parquet-go/encoding\"\n\t\"github.com/segmentio/parquet-go/format\"\n)\n\ntype Di"
},
{
"path": "encoding/plain/plain.go",
"chars": 7685,
"preview": "// Package plain implements the PLAIN parquet encoding.\n//\n// https://github.com/apache/parquet-format/blob/master/Encod"
},
{
"path": "encoding/plain/plain_test.go",
"chars": 504,
"preview": "package plain_test\n\nimport (\n\t\"bytes\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/encoding/plain\"\n)\n\nfunc TestAppendBo"
},
{
"path": "encoding/rle/dictionary.go",
"chars": 1251,
"preview": "package rle\n\nimport (\n\t\"math/bits\"\n\n\t\"github.com/segmentio/parquet-go/encoding\"\n\t\"github.com/segmentio/parquet-go/format"
},
{
"path": "encoding/rle/rle.go",
"chars": 14386,
"preview": "// Package rle implements the hybrid RLE/Bit-Packed encoding employed in\n// repetition and definition levels, dictionary"
},
{
"path": "encoding/rle/rle_amd64.go",
"chars": 1627,
"preview": "//go:build !purego\n\npackage rle\n\nimport (\n\t\"golang.org/x/sys/cpu\"\n)\n\nvar (\n\tencodeInt32IndexEqual8Contiguous func(words "
},
{
"path": "encoding/rle/rle_amd64.s",
"chars": 4235,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\nGLOBL bitMasks<>(SB), RODATA|NOPTR, $64\nDATA bitMasks<>+0(SB)/8, $0b00000001"
},
{
"path": "encoding/rle/rle_amd64_test.go",
"chars": 692,
"preview": "//go:build go1.18 && !purego && amd64\n// +build go1.18,!purego,amd64\n\npackage rle\n\nimport \"testing\"\n\nfunc TestEncodeInt3"
},
{
"path": "encoding/rle/rle_purego.go",
"chars": 572,
"preview": "//go:build purego || !amd64\n\npackage rle\n\nfunc encodeBytesBitpack(dst []byte, src []uint64, bitWidth uint) int {\n\treturn"
},
{
"path": "encoding/rle/rle_test.go",
"chars": 1365,
"preview": "//go:build go1.18\n// +build go1.18\n\npackage rle\n\nimport (\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/encoding/fuzz\"\n\t"
},
{
"path": "encoding/rle/testdata/fuzz/FuzzEncodeBoolean/6be5e340694798c2e5b94c758f0262edd2edf8af5795d4c6c60f6e02643bbb96",
"chars": 46,
"preview": "go test fuzz v1\n[]byte(\"0\\x00\\x00\")\nint64(93)\n"
},
{
"path": "encoding/rle/testdata/fuzz/FuzzEncodeBoolean/9772b3f21a6f61810fe38d120bcc9da6d78540f22dc819a4201283608671fdf4",
"chars": 44,
"preview": "go test fuzz v1\n[]byte(\"00000001\")\nint64(0)\n"
},
{
"path": "encoding/rle/testdata/fuzz/FuzzEncodeInt32/06ba4bdb19de593e669c642987e270fe2488d4d58ecd712db136a3e011071253",
"chars": 40,
"preview": "go test fuzz v1\n[]byte(\"0000\")\nint64(0)\n"
},
{
"path": "encoding/rle/testdata/fuzz/FuzzEncodeLevels/0468684de48f926219bfc47be13ddf085b5a0ed9fbd9c40a005641b253e88d33",
"chars": 77,
"preview": "go test fuzz v1\n[]byte(\"\\xba\\xba\\xba\\xba0\\xba\\xba\\xba\\xba\\xba\\xba\")\nint64(0)\n"
},
{
"path": "encoding/test/test_go17.go",
"chars": 33,
"preview": "//go:build !go1.17\n\npackage test\n"
},
{
"path": "encoding/test/test_go18.go",
"chars": 2425,
"preview": "//go:build go1.18\n\npackage test\n\nimport (\n\t\"fmt\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/encoding\"\n)\n\nfunc EncodeI"
},
{
"path": "encoding/values.go",
"chars": 6750,
"preview": "package encoding\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/segmentio/parquet-go/deprecated\"\n\t\"github.com/segmentio/parquet-go/inter"
},
{
"path": "encoding/values_test.go",
"chars": 180,
"preview": "package encoding_test\n\nimport (\n\t\"testing\"\n\t\"unsafe\"\n\n\t\"github.com/segmentio/parquet-go/encoding\"\n)\n\nfunc TestValuesSize"
},
{
"path": "encoding.go",
"chars": 4040,
"preview": "package parquet\n\nimport (\n\t\"math/bits\"\n\n\t\"github.com/segmentio/parquet-go/encoding\"\n\t\"github.com/segmentio/parquet-go/en"
},
{
"path": "errors.go",
"chars": 3194,
"preview": "package parquet\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n)\n\nvar (\n\t// ErrCorrupted is an error returned by the Err method of ColumnPag"
},
{
"path": "example_test.go",
"chars": 1285,
"preview": "package parquet_test\n\nimport (\n\t\"fmt\"\n\t\"io\"\n\t\"io/ioutil\"\n\t\"log\"\n\t\"os\"\n\n\t\"github.com/segmentio/parquet-go\"\n)\n\nfunc Exampl"
},
{
"path": "file.go",
"chars": 22237,
"preview": "package parquet\n\nimport (\n\t\"bufio\"\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"hash/crc32\"\n\t\"io\"\n\t\"sort\"\n\t\"strings\"\n\t\"sync\"\n\n\t\"github.co"
},
{
"path": "file_test.go",
"chars": 2946,
"preview": "package parquet_test\n\nimport (\n\t\"io\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go\"\n)\n"
},
{
"path": "filter.go",
"chars": 1751,
"preview": "package parquet\n\n// FilterRowReader constructs a RowReader which exposes rows from reader for\n// which the predicate has"
},
{
"path": "filter_test.go",
"chars": 1263,
"preview": "package parquet_test\n\nimport (\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go\"\n)\n\nfunc TestFilterRowReader(t *testing.T) "
},
{
"path": "format/parquet.go",
"chars": 35241,
"preview": "package format\n\nimport (\n\t\"fmt\"\n\n\t\"github.com/segmentio/parquet-go/deprecated\"\n)\n\n// Types supported by Parquet. These t"
},
{
"path": "format/parquet_test.go",
"chars": 739,
"preview": "package format_test\n\nimport (\n\t\"reflect\"\n\t\"testing\"\n\n\t\"github.com/segmentio/encoding/thrift\"\n\t\"github.com/segmentio/parq"
},
{
"path": "go.mod",
"chars": 467,
"preview": "module github.com/segmentio/parquet-go\n\ngo 1.19\n\nrequire (\n\tgithub.com/andybalholm/brotli v1.0.3\n\tgithub.com/google/uuid"
},
{
"path": "go.sum",
"chars": 2435,
"preview": "github.com/andybalholm/brotli v1.0.3 h1:fpcw+r1N1h0Poc1F/pHbW40cUm/lMEQslZtCkBQ0UnM=\ngithub.com/andybalholm/brotli v1.0."
},
{
"path": "hashprobe/aeshash/aeshash.go",
"chars": 852,
"preview": "// Package aeshash implements hashing functions derived from the Go runtime's\n// internal hashing based on the support o"
},
{
"path": "hashprobe/aeshash/aeshash_amd64.go",
"chars": 922,
"preview": "//go:build !purego\n\npackage aeshash\n\nimport (\n\t\"github.com/segmentio/parquet-go/sparse\"\n\t\"golang.org/x/sys/cpu\"\n)\n\n// En"
},
{
"path": "hashprobe/aeshash/aeshash_amd64.s",
"chars": 3373,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n// func Hash32(value uint32, seed uintptr) uintptr\nTEXT ·Hash32(SB), NOSPLIT,"
},
{
"path": "hashprobe/aeshash/aeshash_purego.go",
"chars": 857,
"preview": "//go:build purego || !amd64\n\npackage aeshash\n\nimport \"github.com/segmentio/parquet-go/sparse\"\n\n// Enabled always returns"
},
{
"path": "hashprobe/aeshash/aeshash_test.go",
"chars": 3459,
"preview": "package aeshash\n\nimport (\n\t\"encoding/binary\"\n\t\"testing\"\n\t\"time\"\n\t\"unsafe\"\n)\n\n//go:noescape\n//go:linkname runtime_memhash"
},
{
"path": "hashprobe/hashprobe.go",
"chars": 18421,
"preview": "// Package hashprobe provides implementations of probing tables for various\n// data types.\n//\n// Probing tables are spec"
},
{
"path": "hashprobe/hashprobe_amd64.go",
"chars": 1383,
"preview": "//go:build !purego\n\npackage hashprobe\n\nimport (\n\t\"github.com/segmentio/parquet-go/sparse\"\n\t\"golang.org/x/sys/cpu\"\n)\n\n//g"
},
{
"path": "hashprobe/hashprobe_amd64.s",
"chars": 4965,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n// This version of the probing algorithm for 32 bit keys takes advantage of\n/"
},
{
"path": "hashprobe/hashprobe_purego.go",
"chars": 679,
"preview": "//go:build purego || !amd64\n\npackage hashprobe\n\nimport (\n\t\"github.com/segmentio/parquet-go/sparse\"\n)\n\nfunc multiProbe32("
},
{
"path": "hashprobe/hashprobe_test.go",
"chars": 8701,
"preview": "package hashprobe\n\nimport (\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n\t\"unsafe\"\n)\n\nfunc TestTable32Group"
},
{
"path": "hashprobe/wyhash/wyhash.go",
"chars": 1349,
"preview": "// Package wyhash implements a hashing algorithm derived from the Go runtime's\n// internal hashing fallback, which uses "
},
{
"path": "hashprobe/wyhash/wyhash_amd64.go",
"chars": 386,
"preview": "//go:build !purego\n\npackage wyhash\n\nimport \"github.com/segmentio/parquet-go/sparse\"\n\n//go:noescape\nfunc MultiHashUint32A"
},
{
"path": "hashprobe/wyhash/wyhash_amd64.s",
"chars": 2147,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n#define m1 0xa0761d6478bd642f\n#define m2 0xe7037ed1a0b428db\n#define m3 0x8ebc"
},
{
"path": "hashprobe/wyhash/wyhash_purego.go",
"chars": 582,
"preview": "//go:build purego || !amd64\n\npackage wyhash\n\nimport \"github.com/segmentio/parquet-go/sparse\"\n\nfunc MultiHashUint32Array("
},
{
"path": "hashprobe/wyhash/wyhash_test.go",
"chars": 3354,
"preview": "package wyhash\n\nimport (\n\t\"encoding/binary\"\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n)\n\nfunc TestHash32(t *testing.T) {\n\tif h := "
},
{
"path": "internal/bitpack/bitpack.go",
"chars": 278,
"preview": "// Package bitpack implements efficient bit packing and unpacking routines for\n// integers of various bit widths.\npackag"
},
{
"path": "internal/bitpack/masks_int32_amd64.s",
"chars": 42092,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n// --------------------------------------------------------------------------"
},
{
"path": "internal/bitpack/pack.go",
"chars": 2041,
"preview": "package bitpack\n\nimport (\n\t\"encoding/binary\"\n)\n\n// PackInt32 packs values from src to dst, each value is packed into the"
},
{
"path": "internal/bitpack/unpack.go",
"chars": 1070,
"preview": "package bitpack\n\n// PaddingInt32 is the padding expected to exist after the end of input buffers\n// for the UnpackInt32 "
},
{
"path": "internal/bitpack/unpack_int32_amd64.go",
"chars": 929,
"preview": "//go:build !purego\n\npackage bitpack\n\nimport (\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n\t\"golang.org/x/sys/"
},
{
"path": "internal/bitpack/unpack_int32_amd64.s",
"chars": 9262,
"preview": "//go:build !purego\n\n#include \"funcdata.h\"\n#include \"textflag.h\"\n\n// func unpackInt32Default(dst []int32, src []byte, bit"
},
{
"path": "internal/bitpack/unpack_int32_purego.go",
"chars": 508,
"preview": "//go:build purego || !amd64\n\npackage bitpack\n\nimport (\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n)\n\nfunc un"
},
{
"path": "internal/bitpack/unpack_int64_amd64.go",
"chars": 591,
"preview": "//go:build !purego\n\npackage bitpack\n\nimport (\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n\t\"golang.org/x/sys/"
},
{
"path": "internal/bitpack/unpack_int64_amd64.s",
"chars": 5411,
"preview": "//go:build !purego\n\n#include \"funcdata.h\"\n#include \"textflag.h\"\n\n// func unpackInt64Default(dst []int64, src []uint32, b"
},
{
"path": "internal/bitpack/unpack_int64_purego.go",
"chars": 615,
"preview": "//go:build purego || !amd64\n\npackage bitpack\n\nimport \"github.com/segmentio/parquet-go/internal/unsafecast\"\n\nfunc unpackI"
},
{
"path": "internal/bitpack/unpack_test.go",
"chars": 2706,
"preview": "package bitpack_test\n\nimport (\n\t\"fmt\"\n\t\"math/rand\"\n\t\"reflect\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/internal/bit"
},
{
"path": "internal/bytealg/broadcast_amd64.go",
"chars": 302,
"preview": "//go:build !purego\n\npackage bytealg\n\n//go:noescape\nfunc broadcastAVX2(dst []byte, src byte)\n\n// Broadcast writes the src"
},
{
"path": "internal/bytealg/broadcast_amd64.s",
"chars": 821,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n// func broadcastAVX2(dst []byte, src byte)\nTEXT ·broadcastAVX2(SB), NOSPLIT,"
},
{
"path": "internal/bytealg/broadcast_purego.go",
"chars": 127,
"preview": "//go:build purego || !amd64\n\npackage bytealg\n\nfunc Broadcast(dst []byte, src byte) {\n\tfor i := range dst {\n\t\tdst[i] = sr"
},
{
"path": "internal/bytealg/broadcast_test.go",
"chars": 780,
"preview": "package bytealg_test\n\nimport (\n\t\"fmt\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/internal/bytealg\"\n)\n\nfunc TestBroadc"
},
{
"path": "internal/bytealg/bytealg.go",
"chars": 91,
"preview": "// Package bytealg contains optimized algorithms operating on byte slices.\npackage bytealg\n"
},
{
"path": "internal/bytealg/bytealg_amd64.go",
"chars": 429,
"preview": "//go:build !purego\n\npackage bytealg\n\nimport \"golang.org/x/sys/cpu\"\n\nvar (\n\thasAVX2 = cpu.X86.HasAVX2\n\t// These use AVX-5"
},
{
"path": "internal/bytealg/bytealg_test.go",
"chars": 385,
"preview": "package bytealg_test\n\nimport (\n\t\"fmt\"\n\t\"testing\"\n)\n\nvar benchmarkBufferSizes = [...]int{\n\t4 * 1024,\n\t256 * 1024,\n\t2048 *"
},
{
"path": "internal/bytealg/count_amd64.go",
"chars": 1015,
"preview": "//go:build !purego\n\npackage bytealg\n\n// This function is similar to using the standard bytes.Count function with a\n// on"
},
{
"path": "internal/bytealg/count_amd64.s",
"chars": 1729,
"preview": "//go:build !purego\n\n#include \"textflag.h\"\n\n// func Count(data []byte, value byte) int\nTEXT ·Count(SB), NOSPLIT, $0-40\n "
},
{
"path": "internal/bytealg/count_purego.go",
"chars": 147,
"preview": "//go:build purego || !amd64\n\npackage bytealg\n\nimport \"bytes\"\n\nfunc Count(data []byte, value byte) int {\n\treturn bytes.Co"
},
{
"path": "internal/bytealg/count_test.go",
"chars": 752,
"preview": "package bytealg_test\n\nimport (\n\t\"bytes\"\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/internal/bytealg\"\n\t\"github.com/seg"
},
{
"path": "internal/debug/debug.go",
"chars": 1814,
"preview": "package debug\n\nimport (\n\t\"encoding/hex\"\n\t\"fmt\"\n\t\"io\"\n\t\"log\"\n\t\"os\"\n\t\"strconv\"\n\t\"strings\"\n)\n\nfunc ReaderAt(reader io.Reade"
},
{
"path": "internal/debug/finalizer_off.go",
"chars": 140,
"preview": "//go:build debug\n\npackage debug\n\n// SetFinalizer is a no-op when the debug tag is specified.\nfunc SetFinalizer(interface"
},
{
"path": "internal/debug/finalizer_on.go",
"chars": 139,
"preview": "//go:build !debug\n\npackage debug\n\nimport \"runtime\"\n\nfunc SetFinalizer(obj, finalizer interface{}) { runtime.SetFinalizer"
},
{
"path": "internal/quick/quick.go",
"chars": 4571,
"preview": "package quick\n\nimport (\n\t\"fmt\"\n\t\"math\"\n\t\"math/rand\"\n\t\"reflect\"\n\t\"strings\"\n\t\"time\"\n)\n\nvar DefaultConfig = Config{\n\tSizes:"
},
{
"path": "internal/unsafecast/unsafecast_go17.go",
"chars": 3728,
"preview": "//go:build !go1.18\n\npackage unsafecast\n\nimport (\n\t\"reflect\"\n\t\"unsafe\"\n)\n\nfunc AddressOfBytes(data []byte) *byte {\n\tretur"
},
{
"path": "internal/unsafecast/unsafecast_go18.go",
"chars": 6427,
"preview": "//go:build go1.18\n\n// Package unsafecast exposes functions to bypass the Go type system and perform\n// conversions betwe"
},
{
"path": "internal/unsafecast/unsafecast_go18_test.go",
"chars": 956,
"preview": "//go:build go1.18\n\npackage unsafecast_test\n\nimport (\n\t\"testing\"\n\n\t\"github.com/segmentio/parquet-go/internal/unsafecast\"\n"
},
{
"path": "level.go",
"chars": 591,
"preview": "package parquet\n\nimport \"github.com/segmentio/parquet-go/internal/bytealg\"\n\nfunc countLevelsEqual(levels []byte, value b"
},
{
"path": "limits.go",
"chars": 1626,
"preview": "package parquet\n\nimport (\n\t\"fmt\"\n\t\"math\"\n)\n\nconst (\n\t// MaxColumnDepth is the maximum column depth supported by this pac"
},
{
"path": "merge.go",
"chars": 6968,
"preview": "package parquet\n\nimport (\n\t\"container/heap\"\n\t\"fmt\"\n\t\"io\"\n)\n\n// MergeRowGroups constructs a row group which is a merged v"
},
{
"path": "merge_test.go",
"chars": 15946,
"preview": "package parquet_test\n\nimport (\n\t\"bytes\"\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"math/rand\"\n\t\"sort\"\n\t\"testing\"\n\n\t\"github.com/segmentio/p"
}
]
// ... and 121 more files (download for full content)
About this extraction
This page contains the full source code of the segmentio/parquet-go GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 321 files (1.5 MB), approximately 487.2k tokens, and a symbol index with 4932 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.