Repository: google/riegeli Branch: master Commit: a0a8dac780d1 Files: 498 Total size: 5.0 MB Directory structure: gitextract_1atzokxc/ ├── .bazelrc ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── MODULE.bazel ├── README.md ├── configure ├── doc/ │ ├── index.md │ ├── record_writer_options.md │ └── riegeli_records_file_format.md ├── python/ │ ├── BUILD │ ├── MANIFEST.in │ ├── README.md │ ├── __init__.py │ ├── build_pip_package.sh │ ├── dummy_binary.py │ ├── riegeli/ │ │ ├── BUILD │ │ ├── BUILD.tpl │ │ ├── __init__.py │ │ ├── base/ │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ ├── riegeli_error.py │ │ │ ├── utils.cc │ │ │ └── utils.h │ │ ├── bytes/ │ │ │ ├── BUILD │ │ │ ├── python_reader.cc │ │ │ ├── python_reader.h │ │ │ ├── python_writer.cc │ │ │ └── python_writer.h │ │ ├── py_extension.bzl │ │ ├── python_configure.bzl │ │ ├── records/ │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ ├── examples/ │ │ │ │ ├── BUILD │ │ │ │ ├── __init__.py │ │ │ │ └── write_read_records.py │ │ │ ├── record_position.cc │ │ │ ├── record_position.h │ │ │ ├── record_reader.cc │ │ │ ├── record_writer.cc │ │ │ ├── records_metadata.proto │ │ │ ├── skipped_region.py │ │ │ └── tests/ │ │ │ ├── BUILD │ │ │ ├── __init__.py │ │ │ ├── records_test.proto │ │ │ └── records_test.py │ │ └── tensorflow/ │ │ ├── BUILD │ │ ├── __init__.py │ │ ├── kernel_tests/ │ │ │ ├── __init__.py │ │ │ └── riegeli_dataset_test.py │ │ └── ops/ │ │ ├── __init__.py │ │ └── riegeli_dataset_ops.py │ └── setup.py ├── riegeli/ │ ├── .gitignore │ ├── BUILD │ ├── base/ │ │ ├── BUILD │ │ ├── any.h │ │ ├── any_initializer.h │ │ ├── any_internal.h │ │ ├── arithmetic.h │ │ ├── assert.cc │ │ ├── assert.h │ │ ├── background_cleaning.cc │ │ ├── background_cleaning.h │ │ ├── binary_search.h │ │ ├── buffer.cc │ │ ├── buffer.h │ │ ├── buffering.h │ │ ├── byte_fill.cc │ │ ├── byte_fill.h │ │ ├── bytes_ref.h │ │ ├── c_string_ref.h │ │ ├── chain.cc │ │ ├── chain.h │ │ ├── chain_base.h │ │ ├── chain_details.h │ │ ├── closing_ptr.h │ │ ├── compact_string.cc │ │ ├── compact_string.h │ │ ├── compare.h │ │ ├── constexpr.h │ │ ├── cord_iterator_span.cc │ │ ├── cord_iterator_span.h │ │ ├── cord_utils.cc │ │ ├── cord_utils.h │ │ ├── debug.cc │ │ ├── debug.h │ │ ├── dependency.h │ │ ├── dependency_base.h │ │ ├── dependency_manager.h │ │ ├── errno_mapping.cc │ │ ├── errno_mapping.h │ │ ├── estimated_allocated_size.h │ │ ├── external_data.cc │ │ ├── external_data.h │ │ ├── external_ref.h │ │ ├── external_ref_base.h │ │ ├── external_ref_support.h │ │ ├── global.h │ │ ├── hybrid_direct_common.h │ │ ├── hybrid_direct_internal.h │ │ ├── hybrid_direct_map.h │ │ ├── hybrid_direct_set.h │ │ ├── initializer.h │ │ ├── initializer_internal.h │ │ ├── intrusive_shared_ptr.h │ │ ├── invoker.h │ │ ├── iterable.h │ │ ├── maker.h │ │ ├── memory_estimator.cc │ │ ├── memory_estimator.h │ │ ├── moving_dependency.h │ │ ├── new_aligned.h │ │ ├── null_safe_memcpy.h │ │ ├── object.cc │ │ ├── object.h │ │ ├── optional_compact_string.h │ │ ├── options_parser.cc │ │ ├── options_parser.h │ │ ├── ownership.h │ │ ├── parallelism.cc │ │ ├── parallelism.h │ │ ├── port.h │ │ ├── recycling_pool.h │ │ ├── ref_count.h │ │ ├── reset.h │ │ ├── shared_buffer.cc │ │ ├── shared_buffer.h │ │ ├── shared_ptr.h │ │ ├── sized_shared_buffer.cc │ │ ├── sized_shared_buffer.h │ │ ├── stable_dependency.h │ │ ├── status.cc │ │ ├── status.h │ │ ├── stream_utils.cc │ │ ├── stream_utils.h │ │ ├── string_ref.h │ │ ├── string_utils.cc │ │ ├── string_utils.h │ │ ├── temporary_storage.h │ │ ├── type_erased_ref.h │ │ ├── type_id.h │ │ ├── type_traits.h │ │ ├── types.h │ │ ├── unicode.cc │ │ ├── unicode.h │ │ └── uninitialized_vector.h │ ├── brotli/ │ │ ├── BUILD │ │ ├── brotli_allocator.cc │ │ ├── brotli_allocator.h │ │ ├── brotli_dictionary.cc │ │ ├── brotli_dictionary.h │ │ ├── brotli_reader.cc │ │ ├── brotli_reader.h │ │ ├── brotli_writer.cc │ │ └── brotli_writer.h │ ├── bytes/ │ │ ├── BUILD │ │ ├── array_backward_writer.cc │ │ ├── array_backward_writer.h │ │ ├── array_writer.cc │ │ ├── array_writer.h │ │ ├── backward_writer.cc │ │ ├── backward_writer.h │ │ ├── buffer_options.cc │ │ ├── buffer_options.h │ │ ├── buffered_reader.cc │ │ ├── buffered_reader.h │ │ ├── buffered_writer.cc │ │ ├── buffered_writer.h │ │ ├── cfile_handle.cc │ │ ├── cfile_handle.h │ │ ├── cfile_internal.cc │ │ ├── cfile_internal.h │ │ ├── cfile_internal_for_cc.h │ │ ├── cfile_reader.cc │ │ ├── cfile_reader.h │ │ ├── cfile_writer.cc │ │ ├── cfile_writer.h │ │ ├── chain_backward_writer.cc │ │ ├── chain_backward_writer.h │ │ ├── chain_reader.cc │ │ ├── chain_reader.h │ │ ├── chain_writer.cc │ │ ├── chain_writer.h │ │ ├── compact_string_writer.h │ │ ├── copy_all.cc │ │ ├── copy_all.h │ │ ├── cord_backward_writer.cc │ │ ├── cord_backward_writer.h │ │ ├── cord_reader.cc │ │ ├── cord_reader.h │ │ ├── cord_writer.cc │ │ ├── cord_writer.h │ │ ├── fd_handle.cc │ │ ├── fd_handle.h │ │ ├── fd_internal.cc │ │ ├── fd_internal.h │ │ ├── fd_internal_for_cc.h │ │ ├── fd_mmap_reader.cc │ │ ├── fd_mmap_reader.h │ │ ├── fd_reader.cc │ │ ├── fd_reader.h │ │ ├── fd_writer.cc │ │ ├── fd_writer.h │ │ ├── file_mode_string.cc │ │ ├── file_mode_string.h │ │ ├── iostream_internal.h │ │ ├── istream_reader.cc │ │ ├── istream_reader.h │ │ ├── joining_reader.cc │ │ ├── joining_reader.h │ │ ├── limiting_backward_writer.cc │ │ ├── limiting_backward_writer.h │ │ ├── limiting_reader.cc │ │ ├── limiting_reader.h │ │ ├── limiting_writer.cc │ │ ├── limiting_writer.h │ │ ├── null_backward_writer.cc │ │ ├── null_backward_writer.h │ │ ├── null_writer.cc │ │ ├── null_writer.h │ │ ├── ostream_writer.cc │ │ ├── ostream_writer.h │ │ ├── path_ref.h │ │ ├── position_shifting_backward_writer.cc │ │ ├── position_shifting_backward_writer.h │ │ ├── position_shifting_reader.cc │ │ ├── position_shifting_reader.h │ │ ├── position_shifting_writer.cc │ │ ├── position_shifting_writer.h │ │ ├── prefix_limiting_backward_writer.cc │ │ ├── prefix_limiting_backward_writer.h │ │ ├── prefix_limiting_reader.cc │ │ ├── prefix_limiting_reader.h │ │ ├── prefix_limiting_writer.cc │ │ ├── prefix_limiting_writer.h │ │ ├── pullable_reader.cc │ │ ├── pullable_reader.h │ │ ├── pushable_backward_writer.cc │ │ ├── pushable_backward_writer.h │ │ ├── pushable_writer.cc │ │ ├── pushable_writer.h │ │ ├── read_all.cc │ │ ├── read_all.h │ │ ├── reader.cc │ │ ├── reader.h │ │ ├── reader_cfile.cc │ │ ├── reader_cfile.h │ │ ├── reader_factory.cc │ │ ├── reader_factory.h │ │ ├── reader_istream.cc │ │ ├── reader_istream.h │ │ ├── resizable_writer.cc │ │ ├── resizable_writer.h │ │ ├── restricted_chain_writer.cc │ │ ├── restricted_chain_writer.h │ │ ├── splitting_writer.cc │ │ ├── splitting_writer.h │ │ ├── std_io.cc │ │ ├── std_io.h │ │ ├── string_reader.cc │ │ ├── string_reader.h │ │ ├── string_writer.cc │ │ ├── string_writer.h │ │ ├── stringify.h │ │ ├── stringify_writer.h │ │ ├── vector_writer.h │ │ ├── wrapping_backward_writer.cc │ │ ├── wrapping_backward_writer.h │ │ ├── wrapping_reader.cc │ │ ├── wrapping_reader.h │ │ ├── wrapping_writer.cc │ │ ├── wrapping_writer.h │ │ ├── write.h │ │ ├── write_int_internal.cc │ │ ├── write_int_internal.h │ │ ├── writer.cc │ │ ├── writer.h │ │ ├── writer_cfile.cc │ │ ├── writer_cfile.h │ │ ├── writer_ostream.cc │ │ └── writer_ostream.h │ ├── bzip2/ │ │ ├── BUILD │ │ ├── bzip2_error.cc │ │ ├── bzip2_error.h │ │ ├── bzip2_reader.cc │ │ ├── bzip2_reader.h │ │ ├── bzip2_writer.cc │ │ └── bzip2_writer.h │ ├── chunk_encoding/ │ │ ├── BUILD │ │ ├── README.md │ │ ├── brotli_encoder_selection.cc │ │ ├── brotli_encoder_selection.h │ │ ├── chunk.cc │ │ ├── chunk.h │ │ ├── chunk_decoder.cc │ │ ├── chunk_decoder.h │ │ ├── chunk_encoder.cc │ │ ├── chunk_encoder.h │ │ ├── compressor.cc │ │ ├── compressor.h │ │ ├── compressor_options.cc │ │ ├── compressor_options.h │ │ ├── constants.h │ │ ├── decompressor.cc │ │ ├── decompressor.h │ │ ├── deferred_encoder.cc │ │ ├── deferred_encoder.h │ │ ├── field_projection.h │ │ ├── hash.cc │ │ ├── hash.h │ │ ├── simple_decoder.cc │ │ ├── simple_decoder.h │ │ ├── simple_encoder.cc │ │ ├── simple_encoder.h │ │ ├── transpose_decoder.cc │ │ ├── transpose_decoder.h │ │ ├── transpose_encoder.cc │ │ ├── transpose_encoder.h │ │ └── transpose_internal.h │ ├── containers/ │ │ ├── BUILD │ │ ├── chunked_sorted_string_set.cc │ │ ├── chunked_sorted_string_set.h │ │ ├── linear_sorted_string_set.cc │ │ └── linear_sorted_string_set.h │ ├── csv/ │ │ ├── BUILD │ │ ├── csv_reader.cc │ │ ├── csv_reader.h │ │ ├── csv_record.cc │ │ ├── csv_record.h │ │ ├── csv_writer.cc │ │ └── csv_writer.h │ ├── digests/ │ │ ├── BUILD │ │ ├── adler32_digester.cc │ │ ├── adler32_digester.h │ │ ├── crc32_digester.cc │ │ ├── crc32_digester.h │ │ ├── crc32c_digester.h │ │ ├── digest_converter.h │ │ ├── digester_handle.cc │ │ ├── digester_handle.h │ │ ├── digesting_reader.cc │ │ ├── digesting_reader.h │ │ ├── digesting_writer.cc │ │ ├── digesting_writer.h │ │ ├── highwayhash_digester.cc │ │ ├── highwayhash_digester.h │ │ ├── md5_digester.h │ │ ├── openssl_digester.h │ │ ├── sha1_digester.h │ │ ├── sha256_digester.h │ │ ├── sha512_256_digester.h │ │ ├── sha512_digester.h │ │ └── wrapping_digester.h │ ├── endian/ │ │ ├── BUILD │ │ ├── endian_reading.h │ │ └── endian_writing.h │ ├── gcs/ │ │ ├── BUILD │ │ ├── gcs_internal.h │ │ ├── gcs_object.cc │ │ ├── gcs_object.h │ │ ├── gcs_reader.cc │ │ ├── gcs_reader.h │ │ ├── gcs_writer.cc │ │ └── gcs_writer.h │ ├── lines/ │ │ ├── BUILD │ │ ├── line_reading.cc │ │ ├── line_reading.h │ │ ├── line_writing.h │ │ ├── newline.h │ │ ├── text_reader.cc │ │ ├── text_reader.h │ │ ├── text_writer.cc │ │ └── text_writer.h │ ├── lz4/ │ │ ├── BUILD │ │ ├── lz4_dictionary.cc │ │ ├── lz4_dictionary.h │ │ ├── lz4_reader.cc │ │ ├── lz4_reader.h │ │ ├── lz4_writer.cc │ │ └── lz4_writer.h │ ├── messages/ │ │ ├── BUILD │ │ ├── context_projection.h │ │ ├── dynamic_field_handler.h │ │ ├── field_copier.h │ │ ├── field_handler_map.h │ │ ├── field_handlers.cc │ │ ├── field_handlers.h │ │ ├── map_entry_field.h │ │ ├── message_wire_format.h │ │ ├── parse_message.cc │ │ ├── parse_message.h │ │ ├── serialize_message.cc │ │ ├── serialize_message.h │ │ ├── serialized_message_assembler.cc │ │ ├── serialized_message_assembler.h │ │ ├── serialized_message_backward_writer.cc │ │ ├── serialized_message_backward_writer.h │ │ ├── serialized_message_internal.h │ │ ├── serialized_message_reader.cc │ │ ├── serialized_message_reader.h │ │ ├── serialized_message_reader_internal.h │ │ ├── serialized_message_writer.cc │ │ ├── serialized_message_writer.h │ │ ├── text_parse_message.cc │ │ ├── text_parse_message.h │ │ ├── text_print_message.cc │ │ └── text_print_message.h │ ├── ordered_varint/ │ │ ├── BUILD │ │ ├── ordered_varint_internal.h │ │ ├── ordered_varint_reading.cc │ │ ├── ordered_varint_reading.h │ │ ├── ordered_varint_writing.cc │ │ └── ordered_varint_writing.h │ ├── records/ │ │ ├── BUILD │ │ ├── README.md │ │ ├── block.h │ │ ├── chunk_reader.cc │ │ ├── chunk_reader.h │ │ ├── chunk_writer.cc │ │ ├── chunk_writer.h │ │ ├── record_position.cc │ │ ├── record_position.h │ │ ├── record_reader.cc │ │ ├── record_reader.h │ │ ├── record_writer.cc │ │ ├── record_writer.h │ │ ├── records_metadata.proto │ │ ├── skipped_region.cc │ │ ├── skipped_region.h │ │ └── tools/ │ │ ├── BUILD │ │ ├── describe_riegeli_file.cc │ │ ├── records_benchmark.cc │ │ ├── riegeli_summary.proto │ │ ├── tfrecord_recognizer.cc │ │ └── tfrecord_recognizer.h │ ├── snappy/ │ │ ├── BUILD │ │ ├── framed/ │ │ │ ├── BUILD │ │ │ ├── framed_snappy_reader.cc │ │ │ ├── framed_snappy_reader.h │ │ │ ├── framed_snappy_writer.cc │ │ │ └── framed_snappy_writer.h │ │ ├── hadoop/ │ │ │ ├── BUILD │ │ │ ├── hadoop_snappy_reader.cc │ │ │ ├── hadoop_snappy_reader.h │ │ │ ├── hadoop_snappy_writer.cc │ │ │ └── hadoop_snappy_writer.h │ │ ├── snappy_reader.cc │ │ ├── snappy_reader.h │ │ ├── snappy_streams.cc │ │ ├── snappy_streams.h │ │ ├── snappy_writer.cc │ │ └── snappy_writer.h │ ├── tensorflow/ │ │ ├── BUILD │ │ ├── io/ │ │ │ ├── BUILD │ │ │ ├── file_reader.cc │ │ │ ├── file_reader.h │ │ │ ├── file_writer.cc │ │ │ ├── file_writer.h │ │ │ └── tstring_writer.h │ │ ├── kernels/ │ │ │ └── riegeli_dataset_ops.cc │ │ └── ops/ │ │ └── riegeli_dataset_ops.cc │ ├── text/ │ │ ├── BUILD │ │ ├── ascii_align.h │ │ ├── concat.h │ │ ├── join.h │ │ ├── write_int.cc │ │ └── write_int.h │ ├── varint/ │ │ ├── BUILD │ │ ├── varint_internal.h │ │ ├── varint_reading.cc │ │ ├── varint_reading.h │ │ └── varint_writing.h │ ├── xz/ │ │ ├── BUILD │ │ ├── xz_error.cc │ │ ├── xz_error.h │ │ ├── xz_reader.cc │ │ ├── xz_reader.h │ │ ├── xz_writer.cc │ │ └── xz_writer.h │ ├── zlib/ │ │ ├── BUILD │ │ ├── zlib_dictionary.h │ │ ├── zlib_error.cc │ │ ├── zlib_error.h │ │ ├── zlib_reader.cc │ │ ├── zlib_reader.h │ │ ├── zlib_writer.cc │ │ └── zlib_writer.h │ └── zstd/ │ ├── BUILD │ ├── zstd_dictionary.cc │ ├── zstd_dictionary.h │ ├── zstd_reader.cc │ ├── zstd_reader.h │ ├── zstd_writer.cc │ └── zstd_writer.h └── tf_dependency/ ├── BUILD ├── BUILD.tpl └── tf_configure.bzl ================================================ FILE CONTENTS ================================================ ================================================ FILE: .bazelrc ================================================ # Enable Bzlmod by default. common --enable_bzlmod # Use C++17. build --cxxopt=-std=c++17 build --host_cxxopt=-std=c++17 # Make Python protos faster by backing them with C++ protos. # TODO: Reenable once protobuf releases # https://github.com/protocolbuffers/protobuf/pull/22633 # i.e. in version > 32.0. Or possibly switch to upb. # build --define=use_fast_cpp_protos=true # Options from ./configure # This is currently disabled because TensorFlow does not support bzlmod, # hence Riegeli/TensorFlow bindings are broken anyway. # import %workspace%/configure.bazelrc ================================================ FILE: CONTRIBUTING.md ================================================ # How to Contribute We'd love to accept your patches and contributions to this project. There are just a few small guidelines you need to follow. ## Contributor License Agreement Contributions to this project must be accompanied by a Contributor License Agreement. You (or your employer) retain the copyright to your contribution, this simply gives us permission to use and redistribute your contributions as part of the project. Head over to to see your current agreements on file or to sign a new one. You generally only need to submit a CLA once, so if you've already submitted one (even if it was for a different project), you probably don't need to do it again. ## Code reviews All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests. ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MANIFEST.in ================================================ include .bazelrc include *.md include LICENSE include MANIFEST.in include WORKSPACE include configure recursive-include doc * recursive-include python * recursive-include riegeli * recursive-include third_party * ================================================ FILE: MODULE.bazel ================================================ module( name = "riegeli", repo_name = "com_google_riegeli", ) bazel_dep( name = "abseil-cpp", version = "20260107.0", repo_name = "com_google_absl", ) bazel_dep( name = "abseil-py", version = "2.1.0", repo_name = "absl_py", ) bazel_dep( name = "bazel_skylib", version = "1.7.1", ) bazel_dep( name = "boringssl", version = "0.0.0-20240530-2db0eb3", ) bazel_dep( name = "brotli", version = "1.1.0", repo_name = "org_brotli", ) bazel_dep( name = "bzip2", version = "1.0.8", ) bazel_dep( name = "highwayhash", version = "0.0.0-20240305-5ad3bf8.bcr.1", ) bazel_dep( name = "lz4", version = "1.9.4", ) bazel_dep( name = "platforms", version = "0.0.9", ) bazel_dep( name = "protobuf", version = "33.2", repo_name = "com_google_protobuf", ) bazel_dep( name = "rules_cc", version = "0.1.2", ) bazel_dep( name = "rules_python", version = "0.36.0", ) bazel_dep( name = "snappy", version = "1.2.0", ) bazel_dep( name = "xz", version = "5.4.5.bcr.1", ) bazel_dep( name = "zlib", version = "1.3.1.bcr.3", ) bazel_dep( name = "zstd", version = "1.5.6", repo_name = "net_zstd", ) bazel_dep( name = "google_cloud_cpp", version = "3.0.0-rc1", ) # Configure hermetic Python toolchain SUPPORTED_PYTHON_VERSIONS = [ "3.8", "3.9", "3.10", "3.11", "3.12", ] DEFAULT_PYTHON_VERSION = SUPPORTED_PYTHON_VERSIONS[-1] python = use_extension("@rules_python//python/extensions:python.bzl", "python") [ python.toolchain( is_default = version == DEFAULT_PYTHON_VERSION, python_version = version, ) for version in SUPPORTED_PYTHON_VERSIONS ] ================================================ FILE: README.md ================================================ # Riegeli *Riegeli/records* is a file format for storing a sequence of string records, typically serialized protocol buffers. It supports dense compression, fast decoding, seeking, detection and optional skipping of data corruption, filtering of proto message fields for even faster decoding, and parallel encoding. See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md). # Status Riegeli file format will only change in a backward compatible way (i.e. future readers will understand current files, but current readers might not understand files using future features). Riegeli C++ API might change in incompatible ways. ================================================ FILE: configure ================================================ #!/bin/bash # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -e PYTHON_BIN_PATH=`which python` if [[ $PYTHON_BIN_PATH ]] && $PYTHON_BIN_PATH -c "import tensorflow" &>/dev/null; then TF_CFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') TF_LFLAGS=$($PYTHON_BIN_PATH -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') TF_HEADER_DIR=${TF_CFLAGS%% *} TF_HEADER_DIR=${TF_HEADER_DIR#-I} TF_SHARED_LIBRARY_DIR=${TF_LFLAGS%% *} TF_SHARED_LIBRARY_DIR=${TF_SHARED_LIBRARY_DIR#-L} TF_SHARED_LIBRARY_NAME=${TF_LFLAGS##* -l:} else TF_HEADER_DIR= TF_SHARED_LIBRARY_DIR= TF_SHARED_LIBRARY_NAME= fi { printf 'build --action_env PYTHON_BIN_PATH="%s"\n' "$PYTHON_BIN_PATH" printf 'build --action_env TF_HEADER_DIR="%s"\n' "$TF_HEADER_DIR" printf 'build --action_env TF_SHARED_LIBRARY_DIR="%s"\n' "$TF_SHARED_LIBRARY_DIR" printf 'build --action_env TF_SHARED_LIBRARY_NAME="%s"\n' "$TF_SHARED_LIBRARY_NAME" } >configure.bazelrc echo "Set up configure.bazelrc. Make sure to include it in your .bazelrc file." ================================================ FILE: doc/index.md ================================================ # Riegeli *Riegeli/records* is a file format for storing a sequence of string records, typically serialized protocol buffers. It supports dense compression, fast decoding, seeking, detection and optional skipping of data corruption, filtering of proto message fields for even faster decoding, and parallel encoding. * [Specification of Riegeli/records file format](riegeli_records_file_format.md). * [Specifying options for writing Riegeli/records files](record_writer_options.md). ================================================ FILE: doc/record_writer_options.md ================================================ # Specifying options for writing Riegeli/records files Options for writing Riegeli/records files can be specified as a string: ```data options ::= option? ("," option?)* option ::= "default" | "transpose" (":" ("true" | "false"))? | "uncompressed" | "brotli" (":" brotli_level)? | "zstd" (":" zstd_level)? | "snappy" (":" snappy_level)? | "window_log" ":" window_log | "brotli_encoder" ":" ("rbrotli_or_cbrotli" | "cbrotli" | "rbrotli") | "chunk_size" ":" chunk_size | "bucket_fraction" ":" bucket_fraction | "padding" (":" padding)? | "initial_padding" (":" padding)? | "final_padding" (":" padding)? | "parallelism" ":" parallelism brotli_level ::= integer in the range [0..11] (default 6) zstd_level ::= integer in the range [-131072..22] (default 3) snappy_level ::= integer in the range [1..2] (default 1) window_log ::= "auto" or integer in the range [10..31] chunk_size ::= "auto" or positive integer expressed as real with optional suffix [BkKMGTPE] bucket_fraction ::= real in the range [0..1] padding ::= positive integer expressed as real with optional suffix [BkKMGTPE] (default 64K) parallelism ::= non-negative integer ``` An empty string is the same as `default`. ## `transpose` If `true` (`transpose` is the same as `transpose:true`), records should be serialized proto messages (but nothing will break if they are not). A chunk of records will be processed in a way which allows for better compression. If `false`, a chunk of records will be stored in a simpler format, directly or with compression. Default: `false`. ## Compression algorithms ### `uncompressed` Changes compression algorithm to Uncompressed (turns compression off). ### `brotli` Changes compression algorithm to [Brotli](https://github.com/google/brotli). Sets compression level which tunes the tradeoff between compression density and compression speed (higher = better density but slower). `brotli_level` must be between 0 and 11. Default: `6`. This is the default compression algorithm. ### `zstd` Changes compression algorithm to [Zstd](https://facebook.github.io/zstd/). Sets compression level which tunes the tradeoff between compression density and compression speed (higher = better density but slower). `zstd_level` must be between -131072 and 22. Level 0 is currently equivalent to 3. Default: 3. ### `snappy` Changes compression algorithm to [Snappy](https://google.github.io/snappy/). `snappy_level` must be between 1 and 2. Default: 1. ## `window_log` Logarithm of the LZ77 sliding window size. This tunes the tradeoff between compression density and memory usage (higher = better density but more memory). Special value `auto` means to keep the default (`brotli`: 22, `zstd`: derived from compression level and chunk size). For `uncompressed` and `snappy`, `window_log` must be `auto`. For `brotli`, `window_log` must be `auto` or between 10 and 30. For `zstd`, `window_log` must be `auto` or between 10 and 30 in 32-bit build, 31 in 64-bit build. Default: `auto`. ## `chunk_size` Sets the desired uncompressed size of a chunk which groups messages to be transposed, compressed, and written together. A larger chunk size improves compression density; a smaller chunk size allows to read pieces of the file independently with finer granularity, and reduces memory usage of both writer and reader. Special value `auto` means to keep the default (compressed: 1M, uncompressed: 4k). Default: `auto`. ## `bucket_fraction` Sets the desired uncompressed size of a bucket which groups values of several fields of the given wire type to be compressed together, relative to the desired chunk size, on the scale between 0.0 (compress each field separately) to 1.0 (put all fields of the same wire type in the same bucket. This is meaningful if transpose and compression are enabled. A larger bucket size improves compression density; a smaller bucket size makes reading with projection faster, allowing to skip decompression of values of fields which are not included. Default 1.0. ## `padding` If `padding > 1`, padding is written at the beginning, when flushing, and at the end of the file, for the absolute position to reach a multiple of `padding`. Consequences if `padding` is a multiple of 64KB: 1. Physical concatenation of separately written files yields a valid file (setting metadata in subsequent files is wasteful but harmless). 2. Even if the existing file was corrupted or truncated, data appended to it will be recoverable. The cost is that up to `padding` bytes is wasted when padding is written. `padding` is a shortcut for `set_initial_padding` with `set_final_padding`. `padding` without the parameter assumes 64KB. Default: 1 (no padding). ## `initial_padding` If `initial_padding > 1`, padding is written at the beginning of the file, for the absolute position to reach a multiple of `initial_padding`. See `padding` for details. `initial_padding` without the parameter assumes 64KB. Default: 1 (no padding). ## `final_padding` If `final_padding > 1`, padding is written when flushing and at the end of the file, for the absolute position to reach a multiple of `final_padding`. See `padding` for details. `final_padding` without the parameter assumes 64KB. Default: 1 (no padding). ## `parallelism` Sets the maximum number of chunks being encoded in parallel in background. Larger parallelism can increase throughput, up to a point where it no longer matters; smaller parallelism reduces memory usage. If `parallelism > 0`, chunks are written in background and reporting writing errors is delayed. Default: 0. ================================================ FILE: doc/riegeli_records_file_format.md ================================================ # Riegeli/records file format specification ## Summary File contents are interpreted as a sequence of variable-sized *chunks,* where a chunk encodes some number of *records.* A record can be any byte sequence but Riegeli has special support for the common case where it is a serialized proto message. In order to support seeking and recovery after data corruption, the sequence of chunks is interrupted by a *block header* at every multiple of the block size which is 64 KiB. After the block header the interrupted chunk continues. A record can be identified by the position of the chunk beginning and the index of the record within the chunk. A record can also be identified by a number resembling a file position, defined as the sum of the chunk beginning and the record index. ## Conventions Numbers in block headers and chunk headers are encoded as unsigned Little-Endian integers. Hashes are 64-bit [HighwayHash](https://github.com/google/highwayhash) values with the key {0x2f696c6567656952, 0x0a7364726f636572, 0x2f696c6567656952, 0x0a7364726f636572} ('Riegeli/', 'records\n', 'Riegeli/', 'records\n'). ## Block header A block header allows to locate the chunk that the block header interrupts. Block headers can interrupt a chunk at arbitrary points, including in the middle of the chunk header. If a block header lies exactly between chunks, it is considered to interrupt the next chunk; this includes the situation at the beginning of the file. In this case the chunk formally begins at the beginning of the block, even though it contains no bytes before the block header. * Block header (24 bytes): * `header_hash` (8 bytes) — hash of the rest of the header (`previous_chunk` and `next_chunk`) * `previous_chunk` (8 bytes) — distance from the beginning of the chunk interrupted by this block header to the beginning of the block * `next_chunk` (8 bytes) — distance from the beginning of the block to the end of the chunk interrupted by this block header If `header_hash` does not match, then this block header is corrupted and must be ignored. Block headers can be skipped during sequential file reading, they are useful only for seeking and for error recovery. ## Chunk A chunk must not begin inside nor immediately after a block header. * Chunk header (40 bytes): * `header_hash` (8 bytes) — hash of the rest of the header (`data_size` up to and including `decoded_data_size`) * `data_size` (8 bytes) — size of `data` (excluding intervening block headers) * `data_hash` (8 bytes) — hash of `data` * `chunk_type` (1 byte) — determines how to interpret `data` * `num_records` (7 bytes) — number of records after decoding * `decoded_data_size` (8 bytes) — sum of record sizes after decoding * `data` (`data_size` bytes) — encoded records or other data * `padding` — ignored (usually filled with zeros by the encoder) If `header_hash` does not match, header contents cannot be trusted; if skipping over corruption is desired, a valid chunk should be located using block headers. If `data_hash` does not match, `data` is corrupted; if skipping over corruption is desired, the chunk must be ignored. The size of `padding` is the minimum size which satisfies the following constraints: * The chunk (including chunk header, `data`, `padding`, and intervening block headers) has at least as many bytes as `num_records`. * The chunk does not end inside nor immediately after a block header. If `num_records` is 0, `decoded_data_size` has a meaning depending on the chunk type. *Rationale:* *The presence of `padding` allows to assign unique numbers resembling file positions to records.* *`decoded_data_size` is stored in the chunk header, instead of being implied by or stored in `data`, to help decoders decide how many chunks to potentially read ahead.* ## Chunk data Some parts of chunk data are compressed. The compression format is generally specified as `compression_type` (byte): * 0 — none * 0x62 ('b') — [Brotli](https://github.com/google/brotli) * 0x7a ('z') — [Zstd](https://facebook.github.io/zstd/) * 0x73 ('s') — [Snappy](https://google.github.io/snappy/) Any compressed block is prefixed with its decompressed size (varint64) unless `compression_type` is 0. *Rationale:* *Knowing the decompressed size can make easier for the decoder to decompress data into a preallocated array.* ### File signature `chunk_type` is 0x73 ('s'). A file signature chunk must be present at the beginning of the file. It may also be present elsewhere, in which case it encodes no records and is ignored. `data_size`, `num_records`, and `decoded_data_size` must be 0. This makes the first 64 bytes of a Riegeli/records file fixed: ```data 83 af 70 d1 0d 88 4a 3f 00 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 91 ba c2 3c 92 87 e1 a9 00 00 00 00 00 00 00 00 e1 9f 13 c0 e9 b1 c3 72 73 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ``` ### File metadata `chunk_type` is 0x6d ('m'). A file metadata chunk provides information describing the records. Metadata are not necessary to read the records but might be helpful to interpret their contents. If present, metadata should be written immediately after file signature. The chunk is encoded like a transposed chunk with a single record containing a serialized `RecordsMetadata` proto message, except that `chunk_type` is different and `num_records` is 0. ### Padding chunk `chunk_type` is 0x70 ('p'). A padding chunk encodes no records and only occupies file space. `num_records` and `decoded_data_size` must be 0. `data` is ignored (usually filled with zeros by the encoder). This can be used for more efficient file concatenation (bringing the file offset modulo `kBlockSize` to 0 allows for physical concatenation of files without examining their contents), or for syncing to a file system which requires a particular file offset granularity in order for the sync to be effective. ### Simple chunk with records `chunk_type` is 0x72 ('r'). Simple chunks store record sizes and concatenated record contents in two buffers, possibly compressed. The format: * `compression_type` (byte) — compression type for sizes and values * `compressed_sizes_size` (varint64) — size of `compressed_sizes` * `compressed_sizes` (`compressed_sizes_size` bytes) - compressed buffer with record sizes * `compressed_values` (the rest of `data`) — compressed buffer with record values `compressed_sizes`, after decompression, contains `num_records` varint64s: the size of each record. `compressed_values`, after decompression, contains `decoded_data_size` bytes: concatenation of record values. ### Transposed chunk with records `chunk_type` is 0x74 ('t'). TODO: Document this. ## Properties of the file format * Data corruption anywhere is detected whenever the hash allows this, and it causes only a local data loss of up to a chunk (if chunk data are damaged) or block (if chunk header is damaged). * It is possible to open for append and write more records, even without reading the original file contents; the original file size must be taken into account though. * Seeking to the chunk closest to the given file position requires a seek + small read, then iterating through chunk headers in a block. ## Implementation notes The following formulas clarify how certain field values and positions can be computed. Constants for fixed sizes: ```c++ kBlockSize = 1 << 16; kBlockHeaderSize = 24; kUsableBlockSize = kBlockSize - kBlockHeaderSize; kChunkHeaderSize = 40; ``` Constraints for chunk boundary distances in a block header: ```c++ previous_chunk % kBlockSize < kUsableBlockSize && next_chunk > 0 && (next_chunk - 1) % kBlockSize >= kBlockHeaderSize ``` End position of a chunk which begins at `chunk_begin`: ```c++ NumOverheadBlocks(pos, size) = (size + (pos + kUsableBlockSize - 1) % kBlockSize) / kUsableBlockSize; AddWithOverhead(pos, size) = pos + size + NumOverheadBlocks(pos, size) * kBlockHeaderSize; // Equivalent implementation using unsigned arithmetic modulo 1 << 64: // RemainingInBlock(pos) = (-pos) % kBlockSize; RemainingInBlock(pos) = kBlockSize - 1 - (pos + kBlockSize - 1) % kBlockSize; SaturatingSub(a, b) = a > b ? a - b : 0; // 0 -> 0, 1..25 -> 25, 26 -> 26, ..., 64K -> 64K, 64K+1..64K+25 -> 64K+25 etc. RoundUpToPossibleChunkBoundary(pos) = pos + SaturatingSub(RemainingInBlock(pos), kUsableBlockSize - 1); chunk_end = max(AddWithOverhead(chunk_begin, kChunkHeaderSize + data_size), RoundUpToPossibleChunkBoundary(chunk_begin + num_records)); ``` Fields of a block header at `block_begin` which interrupts a chunk at `chunk_begin`: ```c++ prev_chunk = block_begin - chunk_begin; next_chunk = chunk_end - block_begin; ``` ================================================ FILE: python/BUILD ================================================ load("@rules_python//python:defs.bzl", "py_binary") package(default_visibility = ["//visibility:private"]) licenses(["notice"]) # These dependencies are gathered in a py_binary, instead of directly in # sh_binary data, so that bazel links __init__.py files to runfiles. py_binary( name = "dummy_binary", srcs = ["dummy_binary.py"], srcs_version = "PY3", deps = [ "//python/riegeli", "//python/riegeli/tensorflow:riegeli_dataset_ops", ], ) sh_binary( name = "build_pip_package", srcs = ["build_pip_package.sh"], data = [ "MANIFEST.in", "README.md", "setup.py", ":dummy_binary", ], ) ================================================ FILE: python/MANIFEST.in ================================================ recursive-include riegeli *.py ================================================ FILE: python/README.md ================================================ # Riegeli *Riegeli/records* is a file format for storing a sequence of string records, typically serialized protocol buffers. It supports dense compression, fast decoding, seeking, detection and optional skipping of data corruption, filtering of proto message fields for even faster decoding, and parallel encoding. See [documentation](https://github.com/google/riegeli/blob/master/doc/index.md). # Status Riegeli file format will only change in a backward compatible way (i.e. future readers will understand current files, but current readers might not understand files using future features). Riegeli C++ API might change in incompatible ways. ================================================ FILE: python/__init__.py ================================================ ================================================ FILE: python/build_pip_package.sh ================================================ #!/bin/bash # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Builds a pip package for riegeli. # # Usage (where DEST is a where to write the output, e.g. ~/riegeli-dist): # $ bazel build -c opt python:build_pip_package # $ bazel-bin/python/build_pip_package --dest DEST --sdist --bdist set -e function is_absolute { [[ "$1" = /* ]] || [[ "$1" =~ ^[a-zA-Z]:[/\\].* ]] } function real_path() { if is_absolute "$1"; then printf "%s" "$1" else printf "%s/%s" "$PWD" "${1#./}" fi } function build_sdist() { local dest=$1 python python/setup.py sdist --dist-dir "$dest" } function build_bdist() { local dest=$1 cd bazel-bin/python/build_pip_package.runfiles/com_google_riegeli/python python setup.py bdist_wheel --dist-dir "$dest" cd - } function main() { local dest= local sdist=false local bdist=false while [[ $# -gt 0 ]]; do if [[ $1 == --dest ]]; then shift dest=$(real_path "$1") elif [[ $1 == --sdist ]]; then sdist=true elif [[ $1 == --bdist ]]; then bdist=true else printf "Unknown flag: %s\n" "$1" >&2 exit 1 fi shift done if [[ -z $dest ]]; then printf "Missing required flag: --dest DIRECTORY\n" >&2 exit 1 fi if [[ $sdist != true ]] && [[ $bdist != true ]]; then printf "Nothing to do: missing --sdist or --bdist\n" >&2 exit 1 fi mkdir -p -- "$dest" if [[ $sdist = true ]]; then build_sdist "$dest" fi if [[ $bdist = true ]]; then build_bdist "$dest" fi } main "$@" ================================================ FILE: python/dummy_binary.py ================================================ ================================================ FILE: python/riegeli/BUILD ================================================ # Riegeli, file format for storing a sequence of records. load("@rules_python//python:defs.bzl", "py_library") package( default_visibility = ["//visibility:public"], features = ["header_modules"], ) licenses(["notice"]) exports_files(["LICENSE"]) py_library( name = "riegeli", srcs = ["__init__.py"], imports = [".."], deps = [ "//python/riegeli/base:riegeli_error", "//python/riegeli/records:record_position", "//python/riegeli/records:record_reader", "//python/riegeli/records:record_writer", "//python/riegeli/records:records_metadata_py_pb2", "//python/riegeli/records:skipped_region", ], ) ================================================ FILE: python/riegeli/BUILD.tpl ================================================ load("@bazel_tools//tools/python:toolchain.bzl", "py_runtime_pair") licenses(["restricted"]) package( default_visibility = ["//visibility:public"], features = ["header_modules"], ) toolchain( name = "toolchain", toolchain = ":py_runtime_pair", toolchain_type = "@bazel_tools//tools/python:toolchain_type", ) # To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib # See https://docs.python.org/3/extending/windows.html cc_import( name = "python_lib", interface_library = select({ ":windows": ":python_import_lib", # A placeholder for Unix platforms which makes --no_build happy. "//conditions:default": "not-existing.lib", }), system_provided = 1, ) cc_library( name = "python_headers", hdrs = [":python_include"], deps = select({ ":windows": [":python_lib"], "//conditions:default": [], }), includes = ["python_include"], ) cc_library( name = "numpy_headers", hdrs = [":numpy_include"], includes = ["numpy_include"], ) config_setting( name = "windows", values = {"cpu": "x64_windows"}, visibility = ["//visibility:public"], ) %{PYTHON_RUNTIME_PAIR} %{PYTHON_INCLUDE_GENRULE} %{NUMPY_INCLUDE_GENRULE} %{PYTHON_IMPORT_LIB_GENRULE} ================================================ FILE: python/riegeli/__init__.py ================================================ # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Writes or reads Riegeli/records files.""" from riegeli.base import riegeli_error from riegeli.records import record_position from riegeli.records import record_reader from riegeli.records import record_writer from riegeli.records import records_metadata_pb2 from riegeli.records import skipped_region __all__ = ( 'RiegeliError', 'CancelledError', 'UnknownError', 'InvalidArgumentError', 'DeadlineExceededError', 'NotFoundError', 'AlreadyExistsError', 'PermissionDeniedError', 'UnauthenticatedError', 'ResourceExhaustedError', 'FailedPreconditionError', 'AbortedError', 'OutOfRangeError', 'UnimplementedError', 'InternalError', 'UnavailableError', 'DataLossError', 'FlushType', 'RecordPosition', 'SkippedRegion', 'RecordsMetadata', 'set_record_type', 'RecordWriter', 'EXISTENCE_ONLY', 'get_record_type', 'RecordReader', ) # pylint: disable=invalid-name RiegeliError = riegeli_error.RiegeliError CancelledError = riegeli_error.CancelledError UnknownError = riegeli_error.UnknownError InvalidArgumentError = riegeli_error.InvalidArgumentError DeadlineExceededError = riegeli_error.DeadlineExceededError NotFoundError = riegeli_error.NotFoundError AlreadyExistsError = riegeli_error.AlreadyExistsError PermissionDeniedError = riegeli_error.PermissionDeniedError UnauthenticatedError = riegeli_error.UnauthenticatedError ResourceExhaustedError = riegeli_error.ResourceExhaustedError FailedPreconditionError = riegeli_error.FailedPreconditionError AbortedError = riegeli_error.AbortedError OutOfRangeError = riegeli_error.OutOfRangeError UnimplementedError = riegeli_error.UnimplementedError InternalError = riegeli_error.InternalError UnavailableError = riegeli_error.UnavailableError DataLossError = riegeli_error.DataLossError RecordPosition = record_position.RecordPosition SkippedRegion = skipped_region.SkippedRegion RecordsMetadata = records_metadata_pb2.RecordsMetadata FlushType = record_writer.FlushType set_record_type = record_writer.set_record_type RecordWriter = record_writer.RecordWriter EXISTENCE_ONLY = record_reader.EXISTENCE_ONLY get_record_type = record_reader.get_record_type RecordReader = record_reader.RecordReader ================================================ FILE: python/riegeli/base/BUILD ================================================ load("@rules_cc//cc:defs.bzl", "cc_library") load("@rules_python//python:defs.bzl", "py_library") package( default_visibility = ["//python/riegeli:__subpackages__"], features = ["header_modules"], ) licenses(["notice"]) cc_library( name = "utils", srcs = ["utils.cc"], hdrs = ["utils.h"], data = [":riegeli_error"], # Python module imported from C++. # utils.cc has #define before #include to influence what the included # files provide. features = ["-use_header_modules"], deps = [ "//riegeli/base:arithmetic", "//riegeli/base:assert", "//riegeli/base:chain", "//riegeli/base:compare", "//riegeli/base:types", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", "@rules_python//python/cc:current_py_cc_headers", ], ) py_library( name = "riegeli_error", srcs = ["riegeli_error.py"], ) ================================================ FILE: python/riegeli/base/__init__.py ================================================ ================================================ FILE: python/riegeli/base/riegeli_error.py ================================================ # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. __all__ = ( 'RiegeliError', 'CancelledError', 'UnknownError', 'InvalidArgumentError', 'DeadlineExceededError', 'NotFoundError', 'AlreadyExistsError', 'PermissionDeniedError', 'UnauthenticatedError', 'ResourceExhaustedError', 'FailedPreconditionError', 'AbortedError', 'OutOfRangeError', 'UnimplementedError', 'InternalError', 'UnavailableError', 'DataLossError', ) class RiegeliError(Exception): """Base class of errors reported by Google APIs. Sometimes multiple error codes may apply. Services should return the most specific error code that applies. For example, prefer `OutOfRangeError` over `FailedPreconditionError` if both codes apply. Similarly prefer `NotFoundError` or `AlreadyExistsError` over `FailedPreconditionError`. Attributes: code: Error code classifying the error, matching C++ StatusCode. """ class CancelledError(RiegeliError): """The operation was cancelled, typically by the caller.""" code = 1 class UnknownError(RiegeliError): """Unknown error. For example, this error may be returned when a Status value received from another address space belongs to an error-space that is not known in this address space. Also errors raised by APIs that do not return enough error information may be converted to this error. """ code = 2 class InvalidArgumentError(RiegeliError): """The client specified an invalid argument. Note that this differs from `FailedPreconditionError`. `InvalidArgumentError` indicates arguments that are problematic regardless of the state of the system (e.g., a malformed file name). """ code = 3 class DeadlineExceededError(RiegeliError): """The deadline expired before the operation could complete. For operations that change the state of the system, this error may be returned even if the operation has completed successfully. or example, a successful response from a server could have been delayed long enough for the deadline to expire. """ code = 4 class NotFoundError(RiegeliError): """Some requested entity (e.g., file or directory) was not found. Note to server developers: if a request is denied for an entire class of users, such as gradual feature rollout or undocumented allowlist, `NotFoundError` may be used. If a request is denied for some users within a class of users, such as user-based access control, `PermissionDeniedError` must be used. """ code = 5 class AlreadyExistsError(RiegeliError): """The entity that a client attempted to create already exists.""" code = 6 class PermissionDeniedError(RiegeliError): """The caller does not have permission to execute the specified operation. `PermissionDeniedError` must not be used for rejections caused by exhausting some resource (use `ResourceExhaustedError` instead for those errors). `PermissionDeniedError` must not be used if the caller can not be identified (use `UnauthenticatedError` instead for those errors). This error code does not imply the request is valid or the requested entity exists or satisfies other pre-conditions. """ code = 7 class UnauthenticatedError(RiegeliError): """No valid authentication credentials for the operation.""" code = 16 class ResourceExhaustedError(RiegeliError): """Some resource has been exhausted. Perhaps a per-user quota, or perhaps the entire file system is out of space. """ code = 8 class FailedPreconditionError(RiegeliError): """Failed precondition. The operation was rejected because the system is not in a state required for the operation's execution. For example, the directory to be deleted is non-empty, an rmdir operation is applied to a non-directory, etc. A litmus test that may help a service implementor in deciding between `FailedPreconditionError`, `AbortedError`, and `UnavailableError`: (a) Use `UnavailableError` if the client can retry just the failing call. (b) Use `AbortedError` if the client should retry at a higher-level (e.g., when a client-specified test-and-set fails, indicating the client should restart a read-modify-write sequence). (c) Use `FailedPreconditionError` if the client should not retry until the system state has been explicitly fixed. E.g., if an "rmdir" fails because the directory is non-empty, `FailedPreconditionError` should be returned since the client should not retry unless the files are deleted from the directory. """ code = 9 class AbortedError(RiegeliError): """The operation was aborted. Typically due to a concurrency issue such as a sequencer check failure or transaction abort. See litmus test at `FailedPreconditionError` for deciding between `FailedPreconditionError`, `AbortedError`, and `UnavailableError`. """ code = 10 class OutOfRangeError(RiegeliError): """The operation was attempted past the valid range. E.g., seeking or reading past end-of-file. Unlike `InvalidArgumentError`, this error indicates a problem that may be fixed if the system state changes. For example, a 32-bit file system will generate `InvalidArgumentError` if asked to read at an offset that is not in the range [0,2^32-1], but it will generate `OutOfRangeError` if asked to read from an offset past the current file size. There is a fair bit of overlap between `FailedPreconditionError` and `OutOfRangeError`. We recommend using `OutOfRangeError` (the more specific error) when it applies so that callers who are iterating through a space can easily look for an `OutOfRangeError` error to detect when they are done. """ code = 11 class UnimplementedError(RiegeliError): """The operation is not implemented. Or is not supported/enabled in this service. """ code = 12 class InternalError(RiegeliError): """Internal errors. This means that some invariants expected by the underlying system have been broken. This error code is reserved for serious errors. """ code = 13 class UnavailableError(RiegeliError): """The service is currently unavailable. This is most likely a transient condition, which can be corrected by retrying with a backoff. See litmus test at `FailedPreconditionError` for deciding between `FailedPreconditionError`, `AbortedError`, and `UnavailableError`. """ code = 14 class DataLossError(RiegeliError): """Unrecoverable data loss or corruption.""" code = 15 ================================================ FILE: python/riegeli/base/utils.cc ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #define PY_SSIZE_T_CLEAN #include // clang-format: do not reorder the above include. #include "python/riegeli/base/utils.h" // clang-format: do not reorder the above include. #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/chain.h" #include "riegeli/base/compare.h" #include "riegeli/base/types.h" namespace riegeli::python { Exception& Exception::operator=(const Exception& that) noexcept { PythonLock lock; Py_XINCREF(that.type_.get()); type_.reset(that.type_.get()); Py_XINCREF(that.value_.get()); value_.reset(that.value_.get()); Py_XINCREF(that.traceback_.get()); traceback_.reset(that.traceback_.get()); return *this; } Exception Exception::Fetch() { PythonLock::AssertHeld(); PyObject* type; PyObject* value; PyObject* traceback; PyErr_Fetch(&type, &value, &traceback); PyErr_NormalizeException(&type, &value, &traceback); return Exception(type, value, traceback); } PyObject* Exception::Restore() const& { PythonLock::AssertHeld(); Py_XINCREF(type_.get()); Py_XINCREF(value_.get()); Py_XINCREF(traceback_.get()); PyErr_Restore(type_.get(), value_.get(), traceback_.get()); return nullptr; } PyObject* Exception::Restore() && { PythonLock::AssertHeld(); PyErr_Restore(type_.release(), value_.release(), traceback_.release()); return nullptr; } std::string Exception::message() const { if (ok()) return "OK"; PythonLock lock; RIEGELI_ASSERT(PyExceptionClass_Check(type_.get())) << "Expected an exception class, not " << Py_TYPE(type_.get())->tp_name; std::string message = PyExceptionClass_Name(type_.get()); if (value_ == nullptr) return message; const PythonPtr str_result(PyObject_Str(value_.get())); if (ABSL_PREDICT_FALSE(str_result == nullptr)) { PyErr_Clear(); absl::StrAppend(&message, ": "); return message; } StrOrBytes str; if (ABSL_PREDICT_FALSE(!str.FromPython(str_result.get()))) { PyErr_Clear(); absl::StrAppend(&message, ": "); return message; } if (!absl::string_view(str).empty()) { absl::StrAppend(&message, ": ", absl::string_view(str)); } return message; } void SetRiegeliError(const absl::Status& status) { RIEGELI_ASSERT(!status.ok()) << "Failed precondition of SetRiegeliError(): status not failed"; PythonLock::AssertHeld(); PythonPtr message = StringToPython(status.message()); if (ABSL_PREDICT_FALSE(message == nullptr)) return; PyObject* type; switch (status.code()) { #define HANDLE_CODE(name) \ case absl::StatusCode::k##name: { \ static constexpr ImportedConstant k##name##Error( \ "riegeli.base.riegeli_error", #name "Error"); \ if (ABSL_PREDICT_FALSE(!k##name##Error.Verify())) return; \ type = k##name##Error.get(); \ } break // clang-format off HANDLE_CODE(Cancelled); default: HANDLE_CODE(Unknown); HANDLE_CODE(InvalidArgument); HANDLE_CODE(DeadlineExceeded); HANDLE_CODE(NotFound); HANDLE_CODE(AlreadyExists); HANDLE_CODE(PermissionDenied); HANDLE_CODE(Unauthenticated); HANDLE_CODE(ResourceExhausted); HANDLE_CODE(FailedPrecondition); HANDLE_CODE(Aborted); HANDLE_CODE(OutOfRange); HANDLE_CODE(Unimplemented); HANDLE_CODE(Internal); HANDLE_CODE(Unavailable); HANDLE_CODE(DataLoss); // clang-format on #undef HANDLE_CODE } Py_INCREF(type); PyErr_Restore(type, message.release(), nullptr); } namespace py_internal { namespace { // A linked list of all objects of type `StaticObject` which have `value_` // allocated, chained by their `next_` fields. This is used to free the objects // on Python interpreter shutdown. const StaticObject* all_static_objects = nullptr; } // namespace void FreeStaticObjectsImpl() { const StaticObject* static_object = std::exchange(all_static_objects, nullptr); while (static_object != nullptr) { static_object->value_ = nullptr; static_object = std::exchange(static_object->next_, nullptr); } } // `extern "C"` sets the C calling convention for compatibility with the Python // API. `static` avoids making symbols public, as `extern "C"` trumps anonymous // namespace. extern "C" { static void FreeStaticObjects() { FreeStaticObjectsImpl(); } } // extern "C" void StaticObject::RegisterThis() const { PythonLock::AssertHeld(); if (all_static_objects == nullptr) { // This is the first registered `StaticObject` since `Py_Initialize()`. Py_AtExit(FreeStaticObjects); } next_ = std::exchange(all_static_objects, this); } bool ImportedCapsuleBase::ImportValue() const { // For some reason `PyImport_ImportModule()` is sometimes required before // `PyCapsule_Import()` for a module with a nested name. const size_t dot = absl::string_view(capsule_name_).rfind('.'); RIEGELI_ASSERT_NE(dot, absl::string_view::npos) << "Capsule name does not contain a dot: " << capsule_name_; RIEGELI_CHECK( PyImport_ImportModule(std::string(capsule_name_, dot).c_str()) != nullptr) << Exception::Fetch().message(); value_ = PyCapsule_Import(capsule_name_, false); return value_ != nullptr; } } // namespace py_internal bool Identifier::AllocateValue() const { value_ = StringToPython(name_).release(); if (ABSL_PREDICT_FALSE(value_ == nullptr)) return false; PyUnicode_InternInPlace(&value_); RegisterThis(); return true; } bool ImportedConstant::AllocateValue() const { const PythonPtr module_name = StringToPython(module_name_); if (ABSL_PREDICT_FALSE(module_name == nullptr)) return false; const PythonPtr module(PyImport_Import(module_name.get())); if (ABSL_PREDICT_FALSE(module == nullptr)) return false; const PythonPtr attr_name = StringToPython(attr_name_); if (ABSL_PREDICT_FALSE(attr_name == nullptr)) return false; value_ = PyObject_GetAttr(module.get(), attr_name.get()); if (ABSL_PREDICT_FALSE(value_ == nullptr)) return false; RegisterThis(); return true; } bool ExportCapsule(PyObject* module, const char* capsule_name, const void* ptr) { PythonPtr capsule( PyCapsule_New(const_cast(ptr), capsule_name, nullptr)); if (ABSL_PREDICT_FALSE(capsule == nullptr)) return false; const size_t dot = absl::string_view(capsule_name).rfind('.'); RIEGELI_ASSERT_NE(dot, absl::string_view::npos) << "Capsule name does not contain a dot: " << capsule_name; RIEGELI_ASSERT(PyModule_Check(module)) << "Expected a module, not " << Py_TYPE(module)->tp_name; RIEGELI_ASSERT_EQ(absl::string_view(PyModule_GetName(module)), absl::string_view(capsule_name, dot)) << "Module name mismatch"; if (ABSL_PREDICT_FALSE(PyModule_AddObject(module, capsule_name + dot + 1, capsule.release()) < 0)) { return false; } return true; } MemoryView::~MemoryView() { if (object_ != nullptr && Py_REFCNT(object_.get()) > 1) { PyObject* value; PyObject* type; PyObject* traceback; PyErr_Fetch(&value, &type, &traceback); ReleaseInternal(); PyErr_Restore(value, type, traceback); } } PyObject* MemoryView::ToPython(absl::string_view value) { RIEGELI_ASSERT_EQ(object_, nullptr) << "Failed precondition of MemoryView::ToPython(): " "called more than once"; object_.reset(PyMemoryView_FromMemory(const_cast(value.data()), IntCast(value.size()), PyBUF_READ)); return object_.get(); } PyObject* MemoryView::MutableToPython(absl::Span value) { RIEGELI_ASSERT_EQ(object_, nullptr) << "Failed precondition of MemoryView::MutableToPython(): " "called more than once"; object_.reset(PyMemoryView_FromMemory( value.data(), IntCast(value.size()), PyBUF_WRITE)); return object_.get(); } bool MemoryView::Release() { bool release_ok = true; if (object_ != nullptr && Py_REFCNT(object_.get()) > 1) { release_ok = ReleaseInternal(); } object_.reset(); return release_ok; } inline bool MemoryView::ReleaseInternal() { static constexpr Identifier id_release("release"); const PythonPtr release_result( PyObject_CallMethodObjArgs(object_.get(), id_release.get(), nullptr)); return release_result != nullptr; } bool StrOrBytes::FromPython(PyObject* object ABSL_ATTRIBUTE_LIFETIME_BOUND) { RIEGELI_ASSERT_EQ(data_.data(), nullptr) << "Failed precondition of StrOrBytes::FromPython(): " "called more than once"; if (PyUnicode_Check(object)) { Py_ssize_t length; const char* data = PyUnicode_AsUTF8AndSize(object, &length); if (ABSL_PREDICT_FALSE(data == nullptr)) return false; data_ = absl::string_view(data, IntCast(length)); return true; } else if (ABSL_PREDICT_FALSE(!PyBytes_Check(object))) { PyErr_Format(PyExc_TypeError, "Expected str or bytes, not %s", Py_TYPE(object)->tp_name); return false; } data_ = absl::string_view(PyBytes_AS_STRING(object), IntCast(PyBytes_GET_SIZE(object))); return true; } PythonPtr ChainToPython(const Chain& value) { PythonPtr bytes( PyBytes_FromStringAndSize(nullptr, IntCast(value.size()))); if (ABSL_PREDICT_FALSE(bytes == nullptr)) return nullptr; value.CopyTo(PyBytes_AS_STRING(bytes.get())); return bytes; } std::optional ChainFromPython(PyObject* object) { Py_buffer buffer; if (ABSL_PREDICT_FALSE(PyObject_GetBuffer(object, &buffer, PyBUF_CONTIG_RO) < 0)) { return std::nullopt; } Chain result(absl::string_view(static_cast(buffer.buf), IntCast(buffer.len))); PyBuffer_Release(&buffer); return result; } PythonPtr SizeToPython(size_t value) { if (ABSL_PREDICT_FALSE(value > std::numeric_limits::max())) { PyErr_Format(PyExc_OverflowError, "Size out of range: %zu", value); return nullptr; } return PythonPtr( PyLong_FromUnsignedLongLong(IntCast(value))); } std::optional SizeFromPython(PyObject* object) { const PythonPtr index(PyNumber_Index(object)); if (ABSL_PREDICT_FALSE(index == nullptr)) return std::nullopt; RIEGELI_ASSERT(PyLong_Check(index.get())) << "PyNumber_Index() returned an unexpected type: " << Py_TYPE(index.get())->tp_name; unsigned long long index_value = PyLong_AsUnsignedLongLong(index.get()); if (ABSL_PREDICT_FALSE(index_value == static_cast(-1)) && PyErr_Occurred()) { return std::nullopt; } if (ABSL_PREDICT_FALSE(index_value > std::numeric_limits::max())) { PyErr_Format(PyExc_OverflowError, "Size out of range: %llu", index_value); return std::nullopt; } return IntCast(index_value); } PythonPtr PositionToPython(Position value) { if (ABSL_PREDICT_FALSE(value > std::numeric_limits::max())) { PyErr_Format(PyExc_OverflowError, "Position out of range: %ju", uintmax_t{value}); return nullptr; } return PythonPtr( PyLong_FromUnsignedLongLong(IntCast(value))); } std::optional PositionFromPython(PyObject* object) { const PythonPtr index(PyNumber_Index(object)); if (ABSL_PREDICT_FALSE(index == nullptr)) return std::nullopt; RIEGELI_ASSERT(PyLong_Check(index.get())) << "PyNumber_Index() returned an unexpected type: " << Py_TYPE(index.get())->tp_name; const unsigned long long index_value = PyLong_AsUnsignedLongLong(index.get()); if (ABSL_PREDICT_FALSE(index_value == static_cast(-1)) && PyErr_Occurred()) { return std::nullopt; } if (ABSL_PREDICT_FALSE(index_value > std::numeric_limits::max())) { PyErr_Format(PyExc_OverflowError, "Position out of range: %llu", index_value); return std::nullopt; } return IntCast(index_value); } PythonPtr PartialOrderingToPython(PartialOrdering ordering) { if (ordering == PartialOrdering::unordered) { return Py_INCREF(Py_None), PythonPtr(Py_None); } return PythonPtr(PyLong_FromLong(ordering < 0 ? -1 : ordering == 0 ? 0 : 1)); } std::optional PartialOrderingFromPython(PyObject* object) { if (object == Py_None) return PartialOrdering::unordered; const long long_value = PyLong_AsLong(object); if (ABSL_PREDICT_FALSE(long_value == -1) && PyErr_Occurred()) { return std::nullopt; } return riegeli::Compare(long_value, 0); } } // namespace riegeli::python ================================================ FILE: python/riegeli/base/utils.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef PYTHON_RIEGELI_BASE_UTILS_H_ #define PYTHON_RIEGELI_BASE_UTILS_H_ // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #include // clang-format: do not reorder the above include. #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/chain.h" #include "riegeli/base/compare.h" #include "riegeli/base/types.h" namespace riegeli::python { // Ensures that Python GIL is locked. Reentrant. // // Same as `PyGILState_Ensure()` / `PyGILState_Release()`. class PythonLock { public: static void AssertHeld() { RIEGELI_ASSERT(PyGILState_Check()) << "Python GIL was assumed to be held"; } PythonLock() { gstate_ = PyGILState_Ensure(); } PythonLock(const PythonLock&) = delete; PythonLock& operator=(const PythonLock&) = delete; ~PythonLock() { PyGILState_Release(gstate_); } private: PyGILState_STATE gstate_; }; // Unlocks Python GIL, allowing non-Python threads to run. // // Same as `Py_BEGIN_ALLOW_THREADS` / `Py_END_ALLOW_THREADS`. class PythonUnlock { public: PythonUnlock() { PythonLock::AssertHeld(); tstate_ = PyEval_SaveThread(); } PythonUnlock(const PythonUnlock&) = delete; PythonUnlock& operator=(const PythonUnlock&) = delete; ~PythonUnlock() { PyEval_RestoreThread(tstate_); } private: PyThreadState* tstate_; }; // Apply a function with Python GIL unlocked, allowing non-Python threads to // run. // // Same as `Py_BEGIN_ALLOW_THREADS` / `Py_END_ALLOW_THREADS`. template std::invoke_result_t PythonUnlocked(Function&& f) { PythonUnlock unlock; return std::forward(f)(); } // Owned `PyObject` which assumes that Python GIL is held. struct Deleter { template void operator()(T* ptr) const { PythonLock::AssertHeld(); Py_DECREF(ptr); } }; using PythonPtr = std::unique_ptr; // Owned `PyObject` which does not assume that Python GIL is held. struct LockingDeleter { template void operator()(T* ptr) const { PythonLock lock; Py_DECREF(ptr); } }; using PythonPtrLocking = std::unique_ptr; // Allows a C++ object to be safely embedded in a Python object allocated with // `PyType_GenericAlloc()`. // // `PythonWrapped` is similar to `std::optional`, but: // * `PythonWrapped` is POD. // * `PythonWrapped` supports only a subset of `std::optional` API. // * `PythonWrapped` filled with zero bytes is valid and absent // (`PyType_GenericAlloc()` fills the Python object with zero bytes). // * `PythonWrapped` should be explicitly `reset()` in the implementation of // `tp_dealloc` (there is no C++ destructor). template class PythonWrapped { public: static_assert(alignof(T) <= alignof(max_align_t), "PythonWrapped does not support overaligned types"); template ABSL_ATTRIBUTE_REINITIALIZES void emplace(Args&&... args) { if (has_value_) { get()->~T(); } else { has_value_ = true; } new (storage_) T(std::forward(args)...); } ABSL_ATTRIBUTE_REINITIALIZES void reset() { if (has_value_) { get()->~T(); has_value_ = false; } } bool has_value() const { return has_value_; } T* get() ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(has_value_) << "Object uninitialized"; return std::launder(reinterpret_cast(storage_)); } const T* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(has_value_) << "Object uninitialized"; return std::launder(reinterpret_cast(storage_)); } T& operator*() ABSL_ATTRIBUTE_LIFETIME_BOUND { return *get(); } const T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return *get(); } T* operator->() ABSL_ATTRIBUTE_LIFETIME_BOUND { return get(); } const T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return get(); } bool Verify() const { PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(!has_value())) { PyErr_SetString(PyExc_ValueError, "Object uninitialized"); return false; } return true; } private: bool has_value_; alignas(T) char storage_[sizeof(T)]; }; // Represents an optional Python exception being raised. class Exception { public: // No exception. Exception() = default; Exception(const Exception& that) noexcept; Exception& operator=(const Exception& that) noexcept; Exception(Exception&& that) = default; Exception& operator=(Exception&& that) = default; // Fetches the active Python exception. static Exception Fetch(); // Restores the active Python exception. PyObject* Restore() const&; PyObject* Restore() &&; bool ok() const { return type_ == nullptr; } std::string message() const; // For implementing `tp_traverse` of objects containing `Exception`. int Traverse(visitproc visit, void* arg); private: // Steals references. explicit Exception(PyObject* type, PyObject* value, PyObject* traceback) : type_(type), value_(value), traceback_(traceback) {} PythonPtrLocking type_; PythonPtrLocking value_; PythonPtrLocking traceback_; }; // Translate a failed status to the active Python exception, a class extending // `RiegeliError`. void SetRiegeliError(const absl::Status& status); namespace py_internal { // Lazily initialized pointer to a Python object, persisting until interpreter // shutdown. class StaticObject { protected: mutable PyObject* value_ = nullptr; mutable const StaticObject* next_ = nullptr; // Register this object in a global list of static objects. This must be // called when `value_` is allocated. void RegisterThis() const; private: friend void FreeStaticObjectsImpl(); }; // Template parameter independent part of `ImportedCapsule`. class ImportedCapsuleBase { public: // Forces importing the value, returning `false` on failures (with Python // exception set). // // If `Verify()` returns `true`, `get()` does not die. bool Verify() const { PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(value_ == nullptr)) return ImportValue(); return true; } protected: explicit constexpr ImportedCapsuleBase(const char* capsule_name) : capsule_name_(capsule_name) {} ~ImportedCapsuleBase() = default; bool ImportValue() const; mutable void* value_ = nullptr; private: const char* capsule_name_; }; } // namespace py_internal // Creates a Python string (type `str`) which persists until interpreter // shutdown. This is useful for attribute or method names in // `PyObject_GetAttr()` or `PyObject_CallMethodObjArgs()`. // // An instance of `Identifier` should be allocated statically: // ``` // static constexpr Identifier id_write("write"); // ``` // // Then `id_write.get()` is a borrowed reference to the Python object. class Identifier : public py_internal::StaticObject { public: explicit constexpr Identifier(absl::string_view name) : name_(name) {} // Forces allocating the value, returning `false` on failures (with Python // exception set). // // If `Verify()` returns `true`, `get()` does not die. bool Verify() const { PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(value_ == nullptr)) return AllocateValue(); return true; } // Returns the value, allocating it on the first call. Dies on failure // (use `Verify()` to prevent this). PyObject* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(value_ == nullptr)) { RIEGELI_CHECK(AllocateValue()) << Exception::Fetch().message(); } return value_; } private: bool AllocateValue() const; absl::string_view name_; }; // Imports a Python module and gets its attribute, which persists until // interpreter shutdown. // // An instance of `ImportedConstant` should be allocated statically: // ``` // static constexpr ImportedConstant kRiegeliError( // "riegeli.base.riegeli_error", "RiegeliError"); // ``` // // Then `kRiegeliError.get()` is a borrowed reference to the Python object. class ImportedConstant : public py_internal::StaticObject { public: explicit constexpr ImportedConstant(absl::string_view module_name, absl::string_view attr_name) : module_name_(module_name), attr_name_(attr_name) {} // Forces importing the value, returning `false` on failures (with Python // exception set). // // If `Verify()` returns `true`, `get()` does not die. bool Verify() const { PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(value_ == nullptr)) return AllocateValue(); return true; } // Returns the value, importing it on the first call. Dies on failure // (use `Verify()` to prevent this). PyObject* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(value_ == nullptr)) { RIEGELI_CHECK(AllocateValue()) << Exception::Fetch().message(); } return value_; } private: bool AllocateValue() const; absl::string_view module_name_; absl::string_view attr_name_; }; // Exports a Python capsule containing a C++ pointer, which should be valid // forever, by adding it to the given module. // // `capsule_name` must be "module_name.attr_name" with `module_name` // corresponding to `PyModule_GetName(module)`. // // Returns `false` on failure (with Python exception set). bool ExportCapsule(PyObject* module, const char* capsule_name, const void* ptr); // Imports a Python capsule and gets its stored pointer, which persists forever. // // `capsule_name must` be "module_name.attr_name". // // An instance of `ImportedCapsule` should be allocated statically: // ``` // static constexpr ImportedCapsule kRecordPositionApi( // "riegeli.records.record_position._CPPAPI"); // ``` // // Then `kRecordPositionApi.get()` is a pointer stored in the capsule. template class ImportedCapsule : public py_internal::ImportedCapsuleBase { public: explicit constexpr ImportedCapsule(const char* capsule_name) : ImportedCapsuleBase(capsule_name) {} // Returns the value, importing it on the first call. Dies on failure // (use `Verify()` to prevent this). const T* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(value_ == nullptr)) { RIEGELI_CHECK(ImportValue()) << Exception::Fetch().message(); } return static_cast(value_); } const T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return *get(); } const T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return get(); } }; // Converts C++ `long` to a Python `int` object. // // Returns `nullptr` on failure (with Python exception set). inline PythonPtr IntToPython(long value) { return PythonPtr(PyLong_FromLong(value)); } // Converts C++ `absl::string_view` to a Python `bytes` object. // // Returns `nullptr` on failure (with Python exception set). inline PythonPtr BytesToPython(absl::string_view value) { return PythonPtr(PyBytes_FromStringAndSize( value.data(), IntCast(value.size()))); } // Converts C++ array of bytes to a Python `memoryview` object. // // Memory is shared. The C++ memory must be valid as long as the Python object // is needed. class MemoryView { public: MemoryView() = default; MemoryView(const MemoryView&) = delete; MemoryView& operator=(const MemoryView&) = delete; // Calls `Release()`, ignoring its result, without disturbing the Python // exception state. ~MemoryView(); // Creates and returns a read-only `memoryview` object. // // Returns `nullptr` on failure (with Python exception set). // // `ToPython()` or `MutableToPython()` must be called at most once for each // `MemoryView` object. PyObject* ToPython(absl::string_view value); // Creates and returns a mutable `memoryview` object. // // Returns `nullptr` on failure (with Python exception set). // // `ToPython()` or `MutableToPython()` must be called at most once for each // `MemoryView` object. PyObject* MutableToPython(absl::Span value); // If a reference to the `memoryview` has been stored elsewhere, calls // `memoryview.release()` to mark the `memoryview` as invalid. // // Returns `false` on failure (with Python exception set). bool Release(); private: bool ReleaseInternal(); PythonPtr object_; }; // Refers to internals of a Python `bytes`-like object, using the buffer // protocol. class BytesLike { public: BytesLike() noexcept { buffer_.obj = nullptr; } BytesLike(const BytesLike&) = delete; BytesLike& operator=(const BytesLike&) = delete; ~BytesLike() { PythonLock::AssertHeld(); if (buffer_.obj != nullptr) PyBuffer_Release(&buffer_); } // Converts from a Python object. // // Returns `false` on failure (with Python exception set). // // Must be called at most once for each `BytesLike` object. bool FromPython(PyObject* object) { RIEGELI_ASSERT_EQ(buffer_.obj, nullptr) << "Failed precondition of BytesLike::FromPython(): " "called more than once"; return PyObject_GetBuffer(object, &buffer_, PyBUF_CONTIG_RO) == 0; } // Returns the binary contents. /*implicit*/ operator absl::string_view() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return absl::string_view(static_cast(buffer_.buf), IntCast(buffer_.len)); } private: Py_buffer buffer_; }; // Converts C++ `absl::string_view` to a Python `str` object. Unicode is // converted from UTF-8. // // Returns `nullptr` on failure (with Python exception set). inline PythonPtr StringToPython(absl::string_view value) { return PythonPtr(PyUnicode_FromStringAndSize( value.data(), IntCast(value.size()))); } // Refers to internals of a Python object representing text. Valid Python // objects are `str` or `bytes`. Unicode is converted to UTF-8. class StrOrBytes { public: StrOrBytes() noexcept {} StrOrBytes(const StrOrBytes&) = delete; StrOrBytes& operator=(const StrOrBytes&) = delete; // Converts from a Python object. // // Returns `false` on failure (with Python exception set). // // Must be called at most once for each `StrOrBytes` object. bool FromPython(PyObject* object ABSL_ATTRIBUTE_LIFETIME_BOUND); // Returns the text contents. /*implicit*/ operator absl::string_view() const { return data_; } private: absl::string_view data_; }; // Converts C++ `Chain` to a Python `bytes` object. // // Returns `nullptr` on failure (with Python exception set). PythonPtr ChainToPython(const Chain& value); // Converts a Python `bytes`-like object to C++ `Chain`, using the buffer // protocol. // // Returns `std::nullopt` on failure (with Python exception set). std::optional ChainFromPython(PyObject* object); // Converts C++ `size_t` to a Python `int` object. // // Returns `nullptr` on failure (with Python exception set). PythonPtr SizeToPython(size_t value); // Converts a Python object to C++ `size_t`. Valid Python objects are the same // as for slicing: `int` or objects supporting `__index__()`. // // Returns `std::nullopt` on failure (with Python exception set). std::optional SizeFromPython(PyObject* object); // Converts C++ `Position` to a Python `int` object. // // Returns `nullptr` on failure (with Python exception set). PythonPtr PositionToPython(Position value); // Converts a Python object to C++ `Position`. Valid Python objects are the same // as for slicing: `int` or objects supporting `__index__()`. // // Returns `std::nullopt` on failure (with Python exception set). std::optional PositionFromPython(PyObject* object); // Converts C++ `PartialOrdering` to a Python `None` (for `unordered`) or `int` // object (-1 for `less`, 0 for `equivalent`, or 1 for `greater`). // // Returns `nullptr` on failure (with Python exception set). PythonPtr PartialOrderingToPython(PartialOrdering ordering); // Converts a Python object to C++ `PartialOrdering`. Valid Python objects are // `int` (compared with 0) or `None`. // // Returns `std::nullopt` on failure (with Python exception set). std::optional PartialOrderingFromPython(PyObject* object); // Implementation details follow. inline Exception::Exception(const Exception& that) noexcept { *this = that; } inline int Exception::Traverse(visitproc visit, void* arg) { Py_VISIT(type_.get()); Py_VISIT(value_.get()); Py_VISIT(traceback_.get()); return 0; } } // namespace riegeli::python #endif // PYTHON_RIEGELI_BASE_UTILS_H_ ================================================ FILE: python/riegeli/bytes/BUILD ================================================ load("@rules_cc//cc:defs.bzl", "cc_library") package( default_visibility = ["//python/riegeli:__subpackages__"], features = ["header_modules"], ) licenses(["notice"]) cc_library( name = "python_reader", srcs = ["python_reader.cc"], hdrs = ["python_reader.h"], # python_reader.cc has #define before #include to influence what the # included files provide. features = ["-use_header_modules"], deps = [ "//python/riegeli/base:utils", "//riegeli/base:arithmetic", "//riegeli/base:assert", "//riegeli/base:global", "//riegeli/base:object", "//riegeli/base:types", "//riegeli/bytes:buffer_options", "//riegeli/bytes:buffered_reader", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", "@rules_python//python/cc:current_py_cc_headers", ], ) cc_library( name = "python_writer", srcs = ["python_writer.cc"], hdrs = ["python_writer.h"], # python_writer.cc has #define before #include to influence what the # included files provide. features = ["-use_header_modules"], deps = [ "//python/riegeli/base:utils", "//riegeli/base:arithmetic", "//riegeli/base:assert", "//riegeli/base:global", "//riegeli/base:object", "//riegeli/base:types", "//riegeli/bytes:buffer_options", "//riegeli/bytes:buffered_writer", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", "@rules_python//python/cc:current_py_cc_headers", ], ) ================================================ FILE: python/riegeli/bytes/python_reader.cc ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #define PY_SSIZE_T_CLEAN #include // clang-format: do not reorder the above include. #include "python/riegeli/bytes/python_reader.h" // clang-format: do not reorder the above include. #include #include #include #include #include "absl/base/optimization.h" #include "absl/numeric/bits.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "python/riegeli/base/utils.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/global.h" #include "riegeli/base/types.h" #include "riegeli/bytes/buffered_reader.h" namespace riegeli::python { PythonReader::PythonReader(PyObject* src, Options options) : BufferedReader(options.buffer_options()), owns_src_(options.owns_src()) { PythonLock::AssertHeld(); Py_INCREF(src); src_.reset(src); if (options.assumed_pos() != std::nullopt) { set_limit_pos(*options.assumed_pos()); // `supports_random_access_` is left as `false`. random_access_status_ = Global([] { return absl::UnimplementedError( "PythonReader::Options::assumed_pos() excludes random access"); }); } else { static constexpr Identifier id_seekable("seekable"); const PythonPtr seekable_result( PyObject_CallMethodObjArgs(src_.get(), id_seekable.get(), nullptr)); if (ABSL_PREDICT_FALSE(seekable_result == nullptr)) { FailOperation("seekable()"); return; } const int seekable_is_true = PyObject_IsTrue(seekable_result.get()); if (ABSL_PREDICT_FALSE(seekable_is_true < 0)) { FailOperation("PyObject_IsTrue() after seekable()"); return; } if (seekable_is_true == 0) { // Random access is not supported. Assume 0 as the initial position. // `supports_random_access_` is left as `false`. random_access_status_ = Global([] { return absl::UnimplementedError( "seekable() is False which excludes random access"); }); return; } static constexpr Identifier id_tell("tell"); const PythonPtr tell_result( PyObject_CallMethodObjArgs(src_.get(), id_tell.get(), nullptr)); if (ABSL_PREDICT_FALSE(tell_result == nullptr)) { FailOperation("tell()"); return; } const std::optional file_pos = PositionFromPython(tell_result.get()); if (ABSL_PREDICT_FALSE(file_pos == std::nullopt)) { FailOperation("PositionFromPython() after tell()"); return; } set_limit_pos(*file_pos); supports_random_access_ = true; } BeginRun(); } void PythonReader::Done() { BufferedReader::Done(); random_access_status_ = absl::OkStatus(); if (owns_src_ && src_ != nullptr) { PythonLock lock; static constexpr Identifier id_close("close"); const PythonPtr close_result( PyObject_CallMethodObjArgs(src_.get(), id_close.get(), nullptr)); if (ABSL_PREDICT_FALSE(close_result == nullptr)) FailOperation("close()"); } } inline bool PythonReader::FailOperation(absl::string_view operation) { RIEGELI_ASSERT(is_open()) << "Failed precondition of PythonReader::FailOperation(): " "Object closed"; PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(!ok())) { // Ignore this error because `PythonReader` already failed. PyErr_Clear(); return false; } exception_ = Exception::Fetch(); return Fail(absl::UnknownError( absl::StrCat(operation, " failed: ", exception_.message()))); } bool PythonReader::ReadInternal(size_t min_length, size_t max_length, char* dest) { RIEGELI_ASSERT_GT(min_length, 0u) << "Failed precondition of BufferedReader::ReadInternal(): " "nothing to read"; RIEGELI_ASSERT_GE(max_length, min_length) << "Failed precondition of BufferedReader::ReadInternal(): " "max_length < min_length"; RIEGELI_ASSERT_OK(*this) << "Failed precondition of BufferedReader::ReadInternal()"; PythonLock lock; // Find a read function to use, preferring in order: `readinto1()`, // `readinto()`, `read1()`, `read()`. if (ABSL_PREDICT_FALSE(read_function_ == nullptr)) { static constexpr Identifier id_readinto1("readinto1"); read_function_.reset(PyObject_GetAttr(src_.get(), id_readinto1.get())); read_function_name_ = "readinto1()"; if (read_function_ == nullptr) { if (ABSL_PREDICT_FALSE(!PyErr_ExceptionMatches(PyExc_AttributeError))) { return FailOperation(read_function_name_); } PyErr_Clear(); static constexpr Identifier id_readinto("readinto"); read_function_.reset(PyObject_GetAttr(src_.get(), id_readinto.get())); read_function_name_ = "readinto()"; if (read_function_ == nullptr) { if (ABSL_PREDICT_FALSE(!PyErr_ExceptionMatches(PyExc_AttributeError))) { return FailOperation(read_function_name_); } PyErr_Clear(); use_bytes_ = true; static constexpr Identifier id_read1("read1"); read_function_.reset(PyObject_GetAttr(src_.get(), id_read1.get())); read_function_name_ = "read1()"; if (read_function_ == nullptr) { if (ABSL_PREDICT_FALSE( !PyErr_ExceptionMatches(PyExc_AttributeError))) { return FailOperation(read_function_name_); } PyErr_Clear(); static constexpr Identifier id_read("read"); read_function_.reset(PyObject_GetAttr(src_.get(), id_read.get())); read_function_name_ = "read()"; if (ABSL_PREDICT_FALSE(read_function_ == nullptr)) { return FailOperation(read_function_name_); } } } } } for (;;) { if (ABSL_PREDICT_FALSE(limit_pos() == std::numeric_limits::max())) { return FailOverflow(); } const size_t length_to_read = UnsignedMin( max_length, std::numeric_limits::max() - limit_pos(), absl::bit_floor(size_t{std::numeric_limits::max()})); size_t length_read; if (!use_bytes_) { PythonPtr read_result; { // Prefer using `readinto1()` or `readinto()` to avoid copying memory. MemoryView memory_view; PyObject* const memory_view_object = memory_view.MutableToPython(absl::MakeSpan(dest, length_to_read)); if (ABSL_PREDICT_FALSE(memory_view_object == nullptr)) { return FailOperation("MemoryView::MutableToPython()"); } read_result.reset(PyObject_CallFunctionObjArgs( read_function_.get(), memory_view_object, nullptr)); if (ABSL_PREDICT_FALSE(read_result == nullptr)) { return FailOperation(read_function_name_); } if (ABSL_PREDICT_FALSE(!memory_view.Release())) { return FailOperation("MemoryView::Release()"); } } const std::optional length_read_opt = SizeFromPython(read_result.get()); if (ABSL_PREDICT_FALSE(length_read_opt == std::nullopt)) { return FailOperation( absl::StrCat("SizeFromPython() after ", read_function_name_)); } length_read = *length_read_opt; if (ABSL_PREDICT_FALSE(length_read == 0)) return false; if (ABSL_PREDICT_FALSE(length_read > max_length)) { return Fail(absl::InternalError( absl::StrCat(read_function_name_, " read more than requested"))); } } else { const PythonPtr length(SizeToPython(length_to_read)); if (ABSL_PREDICT_FALSE(length == nullptr)) { return FailOperation("SizeToPython()"); } const PythonPtr read_result(PyObject_CallFunctionObjArgs( read_function_.get(), length.get(), nullptr)); if (ABSL_PREDICT_FALSE(read_result == nullptr)) { return FailOperation(read_function_name_); } Py_buffer buffer; if (ABSL_PREDICT_FALSE(PyObject_GetBuffer(read_result.get(), &buffer, PyBUF_CONTIG_RO) < 0)) { return FailOperation( absl::StrCat("PyObject_GetBuffer() after ", read_function_name_)); } if (ABSL_PREDICT_FALSE(buffer.len == 0)) { PyBuffer_Release(&buffer); return false; } if (ABSL_PREDICT_FALSE(IntCast(buffer.len) > max_length)) { PyBuffer_Release(&buffer); return Fail(absl::InternalError( absl::StrCat(read_function_name_, " read more than requested"))); } std::memcpy(dest, buffer.buf, IntCast(buffer.len)); length_read = IntCast(buffer.len); PyBuffer_Release(&buffer); } move_limit_pos(length_read); if (length_read >= min_length) return true; dest += length_read; min_length -= length_read; max_length -= length_read; } } bool PythonReader::SeekBehindBuffer(Position new_pos) { RIEGELI_ASSERT(new_pos < start_pos() || new_pos > limit_pos()) << "Failed precondition of BufferedReader::SeekBehindBuffer(): " "position in the buffer, use Seek() instead"; RIEGELI_ASSERT_EQ(start_to_limit(), 0u) << "Failed precondition of BufferedReader::SeekBehindBuffer(): " "buffer not empty"; if (ABSL_PREDICT_FALSE(!PythonReader::SupportsRandomAccess())) { if (ABSL_PREDICT_FALSE(new_pos < start_pos())) { if (ok()) Fail(random_access_status_); return false; } return BufferedReader::SeekBehindBuffer(new_pos); } if (ABSL_PREDICT_FALSE(!ok())) return false; PythonLock lock; if (new_pos > limit_pos()) { // Seeking forwards. const std::optional size = SizeInternal(); if (ABSL_PREDICT_FALSE(size == std::nullopt)) return false; if (ABSL_PREDICT_FALSE(new_pos > *size)) { // File ends. set_limit_pos(*size); return false; } } set_limit_pos(new_pos); const PythonPtr file_pos = PositionToPython(limit_pos()); if (ABSL_PREDICT_FALSE(file_pos == nullptr)) { return FailOperation("PositionToPython()"); } static constexpr Identifier id_seek("seek"); const PythonPtr seek_result(PyObject_CallMethodObjArgs( src_.get(), id_seek.get(), file_pos.get(), nullptr)); if (ABSL_PREDICT_FALSE(seek_result == nullptr)) { return FailOperation("seek()"); } return true; } inline std::optional PythonReader::SizeInternal() { RIEGELI_ASSERT_OK(*this) << "Failed precondition of PythonReader::SizeInternal()"; RIEGELI_ASSERT(PythonReader::SupportsRandomAccess()) << "Failed precondition of PythonReader::SizeInternal(): " "random access not supported"; PythonLock::AssertHeld(); absl::string_view operation; const PythonPtr file_pos = PositionToPython(0); if (ABSL_PREDICT_FALSE(file_pos == nullptr)) { FailOperation("PositionToPython()"); return std::nullopt; } const PythonPtr whence = IntToPython(2); // `io.SEEK_END` if (ABSL_PREDICT_FALSE(whence == nullptr)) { FailOperation("IntToPython()"); return std::nullopt; } static constexpr Identifier id_seek("seek"); PythonPtr result(PyObject_CallMethodObjArgs( src_.get(), id_seek.get(), file_pos.get(), whence.get(), nullptr)); if (result.get() == Py_None) { // Python2 `file.seek()` returns `None`, so `tell()` is needed to get the // new position. Python2 is dead, but some classes still behave like that. static constexpr Identifier id_tell("tell"); result.reset( PyObject_CallMethodObjArgs(src_.get(), id_tell.get(), nullptr)); operation = "tell()"; } else { // `io.IOBase.seek()` returns the new position. operation = "seek()"; } if (ABSL_PREDICT_FALSE(result == nullptr)) { FailOperation(operation); return std::nullopt; } const std::optional size = PositionFromPython(result.get()); if (ABSL_PREDICT_FALSE(size == std::nullopt)) { FailOperation(absl::StrCat("PositionFromPython() after ", operation)); return std::nullopt; } return *size; } std::optional PythonReader::SizeImpl() { if (ABSL_PREDICT_FALSE(!PythonReader::SupportsRandomAccess())) { if (ok()) Fail(random_access_status_); return std::nullopt; } if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt; PythonLock lock; const std::optional size = SizeInternal(); if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt; const PythonPtr file_pos = PositionToPython(limit_pos()); if (ABSL_PREDICT_FALSE(file_pos == nullptr)) { FailOperation("PositionToPython()"); return std::nullopt; } static constexpr Identifier id_seek("seek"); const PythonPtr seek_result(PyObject_CallMethodObjArgs( src_.get(), id_seek.get(), file_pos.get(), nullptr)); if (ABSL_PREDICT_FALSE(seek_result == nullptr)) { FailOperation("seek()"); return std::nullopt; } return *size; } } // namespace riegeli::python ================================================ FILE: python/riegeli/bytes/python_reader.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef PYTHON_RIEGELI_BYTES_PYTHON_READER_H_ #define PYTHON_RIEGELI_BYTES_PYTHON_READER_H_ // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #include // clang-format: do not reorder the above include. #include #include #include #include "absl/base/attributes.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "python/riegeli/base/utils.h" #include "riegeli/base/object.h" #include "riegeli/base/types.h" #include "riegeli/bytes/buffer_options.h" #include "riegeli/bytes/buffered_reader.h" namespace riegeli::python { // A `Reader` which reads from a Python binary I/O stream. // // The stream must support: // * `close()` - for `Close()` if `Options::owns_src()` // * `readinto1(memoryview)` or // `readinto(memoryview)` or // `read1(int)` or // `read(int)` // * `seekable()` // * `seek(int[, int])` - for `Seek()` or `Size()` // * `tell()` - for `Seek()` or `Size()` // // `PythonReader` supports random access if // `Options::assumed_pos() == std::nullopt` and the stream supports random // access (this is checked by calling `seekable()`). // // Warning: if random access is not supported and the stream is not owned, // it will have an unpredictable amount of extra data consumed because of // buffering. class PythonReader : public BufferedReader { public: class Options : public BufferOptionsBase { public: Options() noexcept {} // If `true`, `PythonReader::Close()` closes the stream. // // Default: `false`. Options& set_owns_src(bool owns_src) & ABSL_ATTRIBUTE_LIFETIME_BOUND { owns_src_ = owns_src; return *this; } Options&& set_owns_src(bool owns_src) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_owns_src(owns_src)); } bool owns_src() const { return owns_src_; } // If `std::nullopt`, the current position reported by `pos()` corresponds // to the current stream position if possible, otherwise 0 is assumed as the // initial position. Random access is supported if the stream supports // random access. // // If not `std::nullopt`, this position is assumed initially, to be reported // by `pos()`. It does not need to correspond to the current stream // position. Random access is not supported. // // Default: `std::nullopt`. Options& set_assumed_pos(std::optional assumed_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND { assumed_pos_ = assumed_pos; return *this; } Options&& set_assumed_pos(std::optional assumed_pos) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_assumed_pos(assumed_pos)); } std::optional assumed_pos() const { return assumed_pos_; } private: bool owns_src_ = false; std::optional assumed_pos_; }; // Creates a closed `PythonReader`. explicit PythonReader(Closed) noexcept : BufferedReader(kClosed) {} // Will read from `src`. explicit PythonReader(PyObject* src, Options options = Options()); PythonReader(PythonReader&& that) noexcept; PythonReader& operator=(PythonReader&& that) noexcept; // Returns a borrowed reference to the stream being read from. PyObject* src() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return src_.get(); } const Exception& exception() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return exception_; } bool ToleratesReadingAhead() override { return BufferedReader::ToleratesReadingAhead() || PythonReader::SupportsRandomAccess(); } bool SupportsRandomAccess() override { return supports_random_access_; } // For implementing `tp_traverse` of objects containing `PythonReader`. int Traverse(visitproc visit, void* arg); protected: void Done() override; bool ReadInternal(size_t min_length, size_t max_length, char* dest) override; bool SeekBehindBuffer(Position new_pos) override; std::optional SizeImpl() override; private: ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation); std::optional SizeInternal(); PythonPtrLocking src_; bool owns_src_ = false; bool supports_random_access_ = false; absl::Status random_access_status_; Exception exception_; PythonPtrLocking read_function_; absl::string_view read_function_name_; bool use_bytes_ = false; }; inline PythonReader::PythonReader(PythonReader&& that) noexcept : BufferedReader(static_cast(that)), src_(std::move(that.src_)), owns_src_(that.owns_src_), supports_random_access_( std::exchange(that.supports_random_access_, false)), random_access_status_(std::move(that.random_access_status_)), exception_(std::move(that.exception_)), read_function_(std::move(that.read_function_)), read_function_name_(that.read_function_name_), use_bytes_(that.use_bytes_) {} inline PythonReader& PythonReader::operator=(PythonReader&& that) noexcept { BufferedReader::operator=(static_cast(that)); src_ = std::move(that.src_); owns_src_ = that.owns_src_; supports_random_access_ = std::exchange(that.supports_random_access_, false); random_access_status_ = std::move(that.random_access_status_); exception_ = std::move(that.exception_); read_function_ = std::move(that.read_function_); read_function_name_ = that.read_function_name_; use_bytes_ = that.use_bytes_; return *this; } inline int PythonReader::Traverse(visitproc visit, void* arg) { Py_VISIT(src_.get()); Py_VISIT(read_function_.get()); return exception_.Traverse(visit, arg); } } // namespace riegeli::python #endif // PYTHON_RIEGELI_BYTES_PYTHON_READER_H_ ================================================ FILE: python/riegeli/bytes/python_writer.cc ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #define PY_SSIZE_T_CLEAN #include // clang-format: do not reorder the above include. #include "python/riegeli/bytes/python_writer.h" // clang-format: do not reorder the above include. #include #include #include #include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/numeric/bits.h" #include "absl/status/status.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "python/riegeli/base/utils.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/global.h" #include "riegeli/base/types.h" #include "riegeli/bytes/buffered_writer.h" namespace riegeli::python { PythonWriter::PythonWriter(PyObject* dest, Options options) : BufferedWriter(options.buffer_options()), owns_dest_(options.owns_dest()) { PythonLock::AssertHeld(); Py_INCREF(dest); dest_.reset(dest); if (options.assumed_pos() != std::nullopt) { set_start_pos(*options.assumed_pos()); // `supports_random_access_` is left as `false`. random_access_status_ = Global([] { return absl::UnimplementedError( "PythonWriter::Options::assumed_pos() excludes random access"); }); } else { static constexpr Identifier id_seekable("seekable"); const PythonPtr seekable_result( PyObject_CallMethodObjArgs(dest_.get(), id_seekable.get(), nullptr)); if (ABSL_PREDICT_FALSE(seekable_result == nullptr)) { FailOperation("seekable()"); return; } const int seekable_is_true = PyObject_IsTrue(seekable_result.get()); if (ABSL_PREDICT_FALSE(seekable_is_true < 0)) { FailOperation("PyObject_IsTrue() after seekable()"); return; } if (seekable_is_true == 0) { // Random access is not supported. Assume 0 as the initial position. // `supports_random_access_` is left as `false`. random_access_status_ = Global([] { return absl::UnimplementedError( "seekable() is False which excludes random access"); }); return; } static constexpr Identifier id_tell("tell"); const PythonPtr tell_result( PyObject_CallMethodObjArgs(dest_.get(), id_tell.get(), nullptr)); if (ABSL_PREDICT_FALSE(tell_result == nullptr)) { FailOperation("tell()"); return; } const std::optional file_pos = PositionFromPython(tell_result.get()); if (ABSL_PREDICT_FALSE(file_pos == std::nullopt)) { FailOperation("PositionFromPython() after tell()"); return; } set_start_pos(*file_pos); supports_random_access_ = true; } BeginRun(); } void PythonWriter::Done() { BufferedWriter::Done(); random_access_status_ = absl::OkStatus(); if (owns_dest_ && dest_ != nullptr) { PythonLock lock; static constexpr Identifier id_close("close"); const PythonPtr close_result( PyObject_CallMethodObjArgs(dest_.get(), id_close.get(), nullptr)); if (ABSL_PREDICT_FALSE(close_result == nullptr)) FailOperation("close()"); } } inline bool PythonWriter::FailOperation(absl::string_view operation) { RIEGELI_ASSERT(is_open()) << "Failed precondition of PythonWriter::FailOperation(): " "Object closed"; PythonLock::AssertHeld(); if (ABSL_PREDICT_FALSE(!ok())) { // Ignore this error because `PythonWriter` already failed. PyErr_Clear(); return false; } exception_ = Exception::Fetch(); return Fail(absl::UnknownError( absl::StrCat(operation, " failed: ", exception_.message()))); } bool PythonWriter::WriteInternal(absl::string_view src) { RIEGELI_ASSERT(!src.empty()) << "Failed precondition of BufferedWriter::WriteInternal(): " "nothing to write"; RIEGELI_ASSERT_OK(*this) << "Failed precondition of BufferedWriter::WriteInternal()"; if (ABSL_PREDICT_FALSE(src.size() > std::numeric_limits::max() - start_pos())) { return FailOverflow(); } PythonLock lock; if (ABSL_PREDICT_FALSE(write_function_ == nullptr)) { static constexpr Identifier id_write("write"); write_function_.reset(PyObject_GetAttr(dest_.get(), id_write.get())); if (ABSL_PREDICT_FALSE(write_function_ == nullptr)) { return FailOperation("write()"); } } do { const size_t length_to_write = UnsignedMin( src.size(), absl::bit_floor(size_t{std::numeric_limits::max()})); size_t length_written; { PythonPtr write_result; if (!use_bytes_) { // Prefer passing a `memoryview` to avoid copying memory. MemoryView memory_view; PyObject* const memory_view_object = memory_view.ToPython( absl::string_view(src.data(), length_to_write)); if (ABSL_PREDICT_FALSE(memory_view_object == nullptr)) { return FailOperation("MemoryView::ToPython()"); } write_result.reset(PyObject_CallFunctionObjArgs( write_function_.get(), memory_view_object, nullptr)); if (ABSL_PREDICT_FALSE(write_result == nullptr)) { if (!PyErr_ExceptionMatches(PyExc_TypeError)) { return FailOperation("write()"); } PyErr_Clear(); use_bytes_ = true; } if (ABSL_PREDICT_FALSE(!memory_view.Release())) { return FailOperation("MemoryView::Release()"); } } if (use_bytes_) { // `write()` does not support `memoryview`. Use `bytes`. const PythonPtr bytes = BytesToPython(src.substr(0, length_to_write)); if (ABSL_PREDICT_FALSE(bytes == nullptr)) { return FailOperation("BytesToPython()"); } write_result.reset(PyObject_CallFunctionObjArgs(write_function_.get(), bytes.get(), nullptr)); if (ABSL_PREDICT_FALSE(write_result == nullptr)) { return FailOperation("write()"); } } if (write_result.get() == Py_None) { // Python2 `file.write()` returns `None`, and would raise an exception // if less than the full length had been written. Python2 is dead, but // some classes still behave like that. length_written = length_to_write; } else { // `io.IOBase.write()` returns the length written. const std::optional length_written_opt = SizeFromPython(write_result.get()); if (ABSL_PREDICT_FALSE(length_written_opt == std::nullopt)) { return FailOperation("SizeFromPython() after write()"); } length_written = *length_written_opt; } } if (ABSL_PREDICT_FALSE(length_written > length_to_write)) { return Fail(absl::InternalError("write() wrote more than requested")); } move_start_pos(length_written); src.remove_prefix(length_written); } while (!src.empty()); return true; } bool PythonWriter::FlushImpl(FlushType flush_type) { if (ABSL_PREDICT_FALSE(!BufferedWriter::FlushImpl(flush_type))) return false; switch (flush_type) { case FlushType::kFromObject: if (!owns_dest_) return true; ABSL_FALLTHROUGH_INTENDED; case FlushType::kFromProcess: case FlushType::kFromMachine: PythonLock lock; static constexpr Identifier id_flush("flush"); const PythonPtr flush_result( PyObject_CallMethodObjArgs(dest_.get(), id_flush.get(), nullptr)); if (ABSL_PREDICT_FALSE(flush_result == nullptr)) { return FailOperation("flush()"); } return true; } RIEGELI_ASSUME_UNREACHABLE() << "Unknown flush type: " << static_cast(flush_type); } bool PythonWriter::SeekBehindBuffer(Position new_pos) { RIEGELI_ASSERT_NE(new_pos, pos()) << "Failed precondition of BufferedWriter::SeekBehindBuffer(): " "position unchanged, use Seek() instead"; RIEGELI_ASSERT_EQ(start_to_limit(), 0u) << "Failed precondition of BufferedWriter::SeekBehindBuffer(): " "buffer not empty"; if (ABSL_PREDICT_FALSE(!PythonWriter::SupportsRandomAccess())) { if (ok()) Fail(random_access_status_); return false; } PythonLock lock; if (new_pos > start_pos()) { // Seeking forwards. const std::optional size = SizeInternal(); if (ABSL_PREDICT_FALSE(size == std::nullopt)) return false; if (ABSL_PREDICT_FALSE(new_pos > *size)) { // File ends. set_start_pos(*size); return false; } } set_start_pos(new_pos); const PythonPtr file_pos = PositionToPython(start_pos()); if (ABSL_PREDICT_FALSE(file_pos == nullptr)) { return FailOperation("PositionToPython()"); } static constexpr Identifier id_seek("seek"); const PythonPtr seek_result(PyObject_CallMethodObjArgs( dest_.get(), id_seek.get(), file_pos.get(), nullptr)); if (ABSL_PREDICT_FALSE(seek_result == nullptr)) { return FailOperation("seek()"); } return true; } inline std::optional PythonWriter::SizeInternal() { RIEGELI_ASSERT_OK(*this) << "Failed precondition of PythonWriter::SizeInternal()"; RIEGELI_ASSERT(PythonWriter::SupportsRandomAccess()) << "Failed precondition of PythonWriter::SizeInternal(): " "random access not supported"; RIEGELI_ASSERT_EQ(start_to_limit(), 0u) << "Failed precondition of PythonWriter::SizeInternal(): " "buffer not empty"; PythonLock::AssertHeld(); absl::string_view operation; const PythonPtr file_pos = PositionToPython(0); if (ABSL_PREDICT_FALSE(file_pos == nullptr)) { FailOperation("PositionToPython()"); return std::nullopt; } const PythonPtr whence = IntToPython(2); // `io.SEEK_END` if (ABSL_PREDICT_FALSE(whence == nullptr)) { FailOperation("IntToPython()"); return std::nullopt; } static constexpr Identifier id_seek("seek"); PythonPtr result(PyObject_CallMethodObjArgs( dest_.get(), id_seek.get(), file_pos.get(), whence.get(), nullptr)); if (result.get() == Py_None) { // Python2 `file.seek()` returns `None`. Python2 is dead, but some classes // still behave like that. static constexpr Identifier id_tell("tell"); result.reset( PyObject_CallMethodObjArgs(dest_.get(), id_tell.get(), nullptr)); operation = "tell()"; } else { // `io.IOBase.seek()` returns the new position. operation = "seek()"; } if (ABSL_PREDICT_FALSE(result == nullptr)) { FailOperation(operation); return std::nullopt; } const std::optional size = PositionFromPython(result.get()); if (ABSL_PREDICT_FALSE(size == std::nullopt)) { FailOperation(absl::StrCat("PositionFromPython() after ", operation)); return std::nullopt; } return *size; } std::optional PythonWriter::SizeBehindBuffer() { RIEGELI_ASSERT_EQ(start_to_limit(), 0u) << "Failed precondition of BufferedWriter::SizeBehindBuffer(): " "buffer not empty"; if (ABSL_PREDICT_FALSE(!PythonWriter::SupportsRandomAccess())) { if (ok()) Fail(random_access_status_); return std::nullopt; } if (ABSL_PREDICT_FALSE(!ok())) return std::nullopt; PythonLock lock; const std::optional size = SizeInternal(); if (ABSL_PREDICT_FALSE(size == std::nullopt)) return std::nullopt; const PythonPtr file_pos = PositionToPython(start_pos()); if (ABSL_PREDICT_FALSE(file_pos == nullptr)) { FailOperation("PositionToPython()"); return std::nullopt; } static constexpr Identifier id_seek("seek"); const PythonPtr seek_result(PyObject_CallMethodObjArgs( dest_.get(), id_seek.get(), file_pos.get(), nullptr)); if (ABSL_PREDICT_FALSE(seek_result == nullptr)) { FailOperation("seek()"); return std::nullopt; } return *size; } bool PythonWriter::TruncateBehindBuffer(Position new_size) { RIEGELI_ASSERT_EQ(start_to_limit(), 0u) << "Failed precondition of BufferedWriter::TruncateBehindBuffer(): " "buffer not empty"; if (ABSL_PREDICT_FALSE(!PythonWriter::SupportsRandomAccess())) { if (ok()) Fail(random_access_status_); return false; } if (ABSL_PREDICT_FALSE(!ok())) return false; PythonLock lock; const std::optional size = SizeInternal(); if (ABSL_PREDICT_FALSE(size == std::nullopt)) return false; if (ABSL_PREDICT_FALSE(new_size > *size)) { // File ends. set_start_pos(*size); return false; } { const PythonPtr file_pos = PositionToPython(new_size); if (ABSL_PREDICT_FALSE(file_pos == nullptr)) { return FailOperation("PositionToPython()"); } static constexpr Identifier id_seek("seek"); const PythonPtr seek_result(PyObject_CallMethodObjArgs( dest_.get(), id_seek.get(), file_pos.get(), nullptr)); if (ABSL_PREDICT_FALSE(seek_result == nullptr)) { return FailOperation("seek()"); } } set_start_pos(new_size); static constexpr Identifier id_truncate("truncate"); const PythonPtr truncate_result( PyObject_CallMethodObjArgs(dest_.get(), id_truncate.get(), nullptr)); if (ABSL_PREDICT_FALSE(truncate_result == nullptr)) { return FailOperation("truncate()"); } return true; } } // namespace riegeli::python ================================================ FILE: python/riegeli/bytes/python_writer.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef PYTHON_RIEGELI_BYTES_PYTHON_WRITER_H_ #define PYTHON_RIEGELI_BYTES_PYTHON_WRITER_H_ // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #include // clang-format: do not reorder the above include. #include #include #include "absl/base/attributes.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "python/riegeli/base/utils.h" #include "riegeli/base/object.h" #include "riegeli/base/types.h" #include "riegeli/bytes/buffer_options.h" #include "riegeli/bytes/buffered_writer.h" namespace riegeli::python { // A `Writer` which writes to a Python binary I/O stream. // // The stream must support: // * `close()` - for `Close()` if `Options::owns_dest()` // * `write(bytes)` // * `flush()` - for `Flush()` // * `seekable()` // * `seek(int[, int])` - for `Seek()`, `Size()`, or `Truncate()` // * `tell()` - for `Seek()`, `Size()`, or `Truncate()` // * `truncate()` - for `Truncate()` // // `PythonWriter` supports random access if // `Options::assumed_pos() == std::nullopt` and the stream supports random // access (this is checked by calling `seekable()`). class PythonWriter : public BufferedWriter { public: class Options : public BufferOptionsBase { public: Options() noexcept {} // If `true`, `PythonWriter::Close()` closes the stream, and // `PythonWriter::Flush(flush_type)` flushes the stream even if `flush_type` // is `FlushType::kFromObject`. // // Default: `false`. Options& set_owns_dest(bool owns_dest) & ABSL_ATTRIBUTE_LIFETIME_BOUND { owns_dest_ = owns_dest; return *this; } Options&& set_owns_dest(bool owns_dest) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_owns_dest(owns_dest)); } bool owns_dest() const { return owns_dest_; } // If `std::nullopt`, the current position reported by `pos()` corresponds // to the current stream position if possible, otherwise 0 is assumed as the // initial position. Random access is supported if the stream supports // random access. // // If not `std::nullopt`, this position is assumed initially, to be reported // by `pos()`. It does not need to correspond to the current stream // position. Random access is not supported. // // Default: `std::nullopt`. Options& set_assumed_pos(std::optional assumed_pos) & ABSL_ATTRIBUTE_LIFETIME_BOUND { assumed_pos_ = assumed_pos; return *this; } Options&& set_assumed_pos(std::optional assumed_pos) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_assumed_pos(assumed_pos)); } std::optional assumed_pos() const { return assumed_pos_; } private: bool owns_dest_ = false; std::optional assumed_pos_; }; // Creates a closed `PythonWriter`. explicit PythonWriter(Closed) noexcept : BufferedWriter(kClosed) {} // Will write to `dest`. explicit PythonWriter(PyObject* dest, Options options = Options()); PythonWriter(PythonWriter&& that) noexcept; PythonWriter& operator=(PythonWriter&& that) noexcept; // Returns a borrowed reference to the stream being written to. PyObject* dest() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return dest_.get(); } const Exception& exception() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return exception_; } bool SupportsRandomAccess() override { return supports_random_access_; } // For implementing `tp_traverse` of objects containing `PythonWriter`. int Traverse(visitproc visit, void* arg); protected: void Done() override; bool WriteInternal(absl::string_view src) override; bool FlushImpl(FlushType flush_type) override; bool SeekBehindBuffer(Position new_pos) override; std::optional SizeBehindBuffer() override; bool TruncateBehindBuffer(Position new_size) override; private: ABSL_ATTRIBUTE_COLD bool FailOperation(absl::string_view operation); std::optional SizeInternal(); PythonPtrLocking dest_; bool owns_dest_ = false; bool supports_random_access_ = false; absl::Status random_access_status_; Exception exception_; PythonPtrLocking write_function_; bool use_bytes_ = false; }; inline PythonWriter::PythonWriter(PythonWriter&& that) noexcept : BufferedWriter(static_cast(that)), dest_(std::move(that.dest_)), owns_dest_(that.owns_dest_), supports_random_access_( std::exchange(that.supports_random_access_, false)), random_access_status_(std::move(that.random_access_status_)), exception_(std::move(that.exception_)), write_function_(std::move(that.write_function_)), use_bytes_(that.use_bytes_) {} inline PythonWriter& PythonWriter::operator=(PythonWriter&& that) noexcept { BufferedWriter::operator=(static_cast(that)); dest_ = std::move(that.dest_); owns_dest_ = that.owns_dest_; supports_random_access_ = std::exchange(that.supports_random_access_, false); random_access_status_ = std::move(that.random_access_status_); exception_ = std::move(that.exception_); write_function_ = std::move(that.write_function_); use_bytes_ = that.use_bytes_; return *this; } inline int PythonWriter::Traverse(visitproc visit, void* arg) { Py_VISIT(dest_.get()); Py_VISIT(write_function_.get()); return exception_.Traverse(visit, arg); } } // namespace riegeli::python #endif // PYTHON_RIEGELI_BYTES_PYTHON_WRITER_H_ ================================================ FILE: python/riegeli/py_extension.bzl ================================================ """Supports writing Python modules in C++.""" load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") load("@rules_python//python:defs.bzl", "py_library") def py_extension( name = None, srcs = None, hdrs = None, data = None, features = None, visibility = None, deps = None): """Creates a Python module implemented in C++. Python modules can depend on a py_extension. Other py_extensions can depend on a generated C++ library named with "_cc" suffix. Args: name: Name for this target. srcs: C++ source files. hdrs: C++ header files, for other py_extensions which depend on this. data: Files needed at runtime. This may include Python libraries. features: Passed to cc_library. visibility: Controls which rules can depend on this. deps: Other C++ libraries that this library depends upon. """ cc_library_name = name + "_cc" cc_binary_name = name + ".so" cc_library( name = cc_library_name, srcs = srcs, hdrs = hdrs, data = data, features = features, visibility = visibility, deps = deps, alwayslink = True, ) cc_binary( name = cc_binary_name, linkshared = True, linkstatic = True, visibility = ["//visibility:private"], deps = [cc_library_name], ) py_library( name = name, data = [cc_binary_name], visibility = visibility, ) ================================================ FILE: python/riegeli/python_configure.bzl ================================================ """Repository rule for Python autoconfiguration. `python_configure` depends on the following environment variables: * `PYTHON_BIN_PATH`: location of python binary. * `PYTHON_LIB_PATH`: Location of python libraries. """ _BAZEL_SH = "BAZEL_SH" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" _PYTHON_LIB_PATH = "PYTHON_LIB_PATH" _TF_PYTHON_CONFIG_REPO = "TF_PYTHON_CONFIG_REPO" def _tpl(repository_ctx, tpl, substitutions = {}, out = None): if not out: out = tpl repository_ctx.template( out, Label("//python/riegeli:{}.tpl".format(tpl)), substitutions, ) def _fail(msg): """Outputs failure message when auto configuration fails.""" red = "\033[0;31m" no_color = "\033[0m" fail("{}Python Configuration Error:{} {}\n".format(red, no_color, msg)) def _is_windows(repository_ctx): """Returns true if the host operating system is Windows.""" os_name = repository_ctx.os.name.lower() return "windows" in os_name def _execute( repository_ctx, cmdline, error_msg = None, error_details = None, empty_stdout_fine = False): """Executes an arbitrary shell command. Args: repository_ctx: the repository_ctx object cmdline: list of strings, the command to execute error_msg: string, a summary of the error if the command fails error_details: string, details about the error or steps to fix it empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise it's an error Return: the result of repository_ctx.execute(cmdline) """ result = repository_ctx.execute(cmdline) if result.stderr or not (empty_stdout_fine or result.stdout): _fail("\n".join([ error_msg.strip() if error_msg else "Repository command failed", result.stderr.strip(), error_details if error_details else "", ])) return result def _read_dir(repository_ctx, src_dir): """Returns a string with all files in a directory. Finds all files inside a directory, traversing subfolders and following symlinks. The returned string contains the full path of all files separated by line breaks. """ if _is_windows(repository_ctx): src_dir = src_dir.replace("/", "\\") find_result = _execute( repository_ctx, ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"], empty_stdout_fine = True, ) # src_files will be used in genrule.outs where the paths must # use forward slashes. result = find_result.stdout.replace("\\", "/") else: find_result = _execute( repository_ctx, ["find", src_dir, "-follow", "-type", "f"], empty_stdout_fine = True, ) result = find_result.stdout return result def _genrule(src_dir, genrule_name, command, outs): """Returns a string with a genrule. Genrule executes the given command and produces the given outputs. """ return ( "genrule(\n" + ' name = "{}",\n' + " outs = [\n" + "{}\n" + " ],\n" + ' cmd = """\n' + "{}\n" + ' """,\n' + ")\n" ).format(genrule_name, outs, command) def _norm_path(path): """Returns a path with '/' and removes the trailing slash.""" return path.replace("\\", "/").rstrip("/") def _symlink_genrule_for_dir( repository_ctx, src_dir, dest_dir, genrule_name, src_files = [], dest_files = []): """Returns a genrule to symlink (or copy if on Windows) a set of files. If src_dir is passed, files will be read from the given directory; otherwise we assume files are in src_files and dest_files """ if src_dir != None: src_dir = _norm_path(src_dir) dest_dir = _norm_path(dest_dir) files = "\n".join( sorted(_read_dir(repository_ctx, src_dir).splitlines()), ) # Create a list with the src_dir stripped to use for outputs. dest_files = files.replace(src_dir, "").splitlines() src_files = files.splitlines() command = [] outs = [] for i in range(len(dest_files)): if dest_files[i] != "": # If we have only one file to link we do not want to use the # dest_dir, as $(@D) will include the full path to the file. dest = "$(@D)/{}{}".format( dest_dir if len(dest_files) != 1 else "", dest_files[i], ) # Copy the headers to create a sandboxable setup. cmd = "cp -f" command.append('{} "{}" "{}"'.format(cmd, src_files[i], dest)) outs.append(' "{}{}",'.format(dest_dir, dest_files[i])) genrule = _genrule( src_dir, genrule_name, " && ".join(command), "\n".join(outs), ) return genrule def _get_python_bin(repository_ctx): """Gets the python bin path.""" python_bin = repository_ctx.os.environ.get(_PYTHON_BIN_PATH) if python_bin != None: return python_bin python_bin_path = repository_ctx.which("python") if python_bin_path != None: return str(python_bin_path) _fail(("Cannot find python in PATH, please make sure " + "python is installed and add its directory in PATH, " + "or --define {}='/something/else'.\nPATH={}").format( _PYTHON_BIN_PATH, repository_ctx.os.environ.get("PATH", ""), )) def _get_bash_bin(repository_ctx): """Gets the bash bin path.""" bash_bin = repository_ctx.os.environ.get(_BAZEL_SH) if bash_bin != None: return bash_bin bash_bin_path = repository_ctx.which("bash") if bash_bin_path != None: return str(bash_bin_path) _fail(("Cannot find bash in PATH, please make sure " + "bash is installed and add its directory in PATH, " + "or --define {}='/path/to/bash'.\nPATH={}").format( _BAZEL_SH, repository_ctx.os.environ.get("PATH", ""), )) def _get_python_runtime_pair(repository_ctx, python_bin): """Builds a py_runtime_pair definition.""" return ( "py_runtime_pair(\n" + ' name = "py_runtime_pair",\n' + " py2_runtime = None,\n" + " py3_runtime = \":py3_runtime\",\n" + ")\n" + "\n" + "py_runtime(\n" + ' name = "py3_runtime",\n' + ' interpreter_path = "{}",\n' + ' python_version = "PY3",\n' + ")\n" ).format(python_bin) def _get_python_lib(repository_ctx, python_bin): """Gets the python lib path.""" python_lib = repository_ctx.os.environ.get(_PYTHON_LIB_PATH) if python_lib != None: return python_lib print_lib = ("<= 1:\n" + " print(paths[0])\n" + "END") cmd = "{} - {}".format(python_bin, print_lib) result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) return result.stdout.strip("\n") def _check_python_lib(repository_ctx, python_lib): """Checks the python lib path.""" cmd = 'test -d "{}" -a -x "{}"'.format(python_lib, python_lib) result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) if result.return_code == 1: _fail("Invalid python library path: {}".format(python_lib)) def _check_python_bin(repository_ctx, python_bin): """Checks the python bin path.""" cmd = '[[ -x "{}" ]] && [[ ! -d "{}" ]]'.format(python_bin, python_bin) result = repository_ctx.execute([_get_bash_bin(repository_ctx), "-c", cmd]) if result.return_code == 1: _fail(("--define {}='{}' is not executable. " + "Is it the python binary?").format( _PYTHON_BIN_PATH, python_bin, )) def _get_python_include(repository_ctx, python_bin): """Gets the python include path.""" result = _execute( repository_ctx, [ python_bin, "-c", "import importlib; " + "import importlib.util; " + "print(importlib.import_module('distutils.sysconfig').get_python_inc() " + "if importlib.util.find_spec('distutils.sysconfig') " + "else importlib.import_module('sysconfig').get_path('include'))", ], error_msg = "Problem getting python include path.", error_details = ("Is the Python binary path set up right? " + "(See ./configure or {}.) " + "Is distutils installed?").format(_PYTHON_BIN_PATH), ) return result.stdout.splitlines()[0] def _get_python_import_lib_name(repository_ctx, python_bin): """Gets Python import library name (pythonXY.lib) on Windows.""" result = _execute( repository_ctx, [ python_bin, "-c", "import sys; " + 'print("python{}{}.lib".format(' + "sys.version_info.major, sys.version_info.minor))", ], error_msg = "Problem getting python import library.", error_details = ("Is the Python binary path set up right? " + "(See ./configure or {}.) ").format(_PYTHON_BIN_PATH), ) return result.stdout.splitlines()[0] def _get_numpy_include(repository_ctx, python_bin): """Gets the numpy include path.""" return _execute( repository_ctx, [ python_bin, "-c", "import numpy; print(numpy.get_include())", ], error_msg = "Problem getting numpy include path.", error_details = "Is numpy installed?", ).stdout.splitlines()[0] def _create_local_python_repository(repository_ctx): """Creates the repository containing files set up to build with Python.""" python_bin = _get_python_bin(repository_ctx) _check_python_bin(repository_ctx, python_bin) python_runtime_pair = _get_python_runtime_pair(repository_ctx, python_bin) python_lib = _get_python_lib(repository_ctx, python_bin) _check_python_lib(repository_ctx, python_lib) python_include = _get_python_include(repository_ctx, python_bin) numpy_include = _get_numpy_include(repository_ctx, python_bin) + "/numpy" python_include_rule = _symlink_genrule_for_dir( repository_ctx, python_include, "python_include", "python_include", ) python_import_lib_genrule = "" # To build Python C/C++ extension on Windows, we need to link to python # import library pythonXY.lib # See https://docs.python.org/3/extending/windows.html if _is_windows(repository_ctx): python_include = _norm_path(python_include) python_import_lib_name = _get_python_import_lib_name( repository_ctx, python_bin, ) python_import_lib_src = "{}/libs/{}".format( python_include.rsplit("/", 1)[0], python_import_lib_name, ) python_import_lib_genrule = _symlink_genrule_for_dir( repository_ctx, None, "", "python_import_lib", [python_import_lib_src], [python_import_lib_name], ) numpy_include_rule = _symlink_genrule_for_dir( repository_ctx, numpy_include, "numpy_include/numpy", "numpy_include", ) _tpl(repository_ctx, "BUILD", { "%{PYTHON_RUNTIME_PAIR}": python_runtime_pair, "%{PYTHON_INCLUDE_GENRULE}": python_include_rule, "%{PYTHON_IMPORT_LIB_GENRULE}": python_import_lib_genrule, "%{NUMPY_INCLUDE_GENRULE}": numpy_include_rule, }) def _create_remote_python_repository(repository_ctx, remote_config_repo): """Creates pointers to a remotely configured repo set up to build with Python. """ repository_ctx.template("BUILD", Label(remote_config_repo + ":BUILD"), {}) def _python_autoconf_impl(repository_ctx): """Implementation of the python_autoconf repository rule.""" if _TF_PYTHON_CONFIG_REPO in repository_ctx.os.environ: _create_remote_python_repository( repository_ctx, repository_ctx.os.environ[_TF_PYTHON_CONFIG_REPO], ) else: _create_local_python_repository(repository_ctx) python_configure = repository_rule( implementation = _python_autoconf_impl, environ = [ _BAZEL_SH, _PYTHON_BIN_PATH, _PYTHON_LIB_PATH, _TF_PYTHON_CONFIG_REPO, ], ) """Detects and configures the local Python. Add the following to your WORKSPACE FILE: ```python python_configure(name = "local_config_python") ``` Args: name: A unique name for this workspace rule. """ ================================================ FILE: python/riegeli/records/BUILD ================================================ load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library") load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library") load("@rules_python//python:defs.bzl", "py_library") load("//python/riegeli:py_extension.bzl", "py_extension") package( default_visibility = ["//python/riegeli:__subpackages__"], features = ["header_modules"], ) licenses(["notice"]) py_extension( name = "record_reader", srcs = ["record_reader.cc"], # Python modules imported from C++. data = [ ":records_metadata_py_pb2", ":skipped_region", "@com_google_protobuf//:protobuf_python", ], # record_reader.cc has #define before #include to influence what the # included files provide. features = ["-use_header_modules"], deps = [ ":record_position_cc", "//python/riegeli/base:utils", "//python/riegeli/bytes:python_reader", "//riegeli/base:arithmetic", "//riegeli/base:assert", "//riegeli/base:chain", "//riegeli/base:compare", "//riegeli/base:types", "//riegeli/chunk_encoding:field_projection", "//riegeli/records:record_position", "//riegeli/records:record_reader", "//riegeli/records:skipped_region", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", "@com_google_absl//absl/strings:string_view", "@rules_python//python/cc:current_py_cc_headers", ], ) py_extension( name = "record_writer", srcs = ["record_writer.cc"], # record_writer.cc has #define before #include to influence what the # included files provide. features = ["-use_header_modules"], deps = [ ":record_position_cc", "//python/riegeli/base:utils", "//python/riegeli/bytes:python_writer", "//riegeli/base:assert", "//riegeli/base:chain", "//riegeli/base:types", "//riegeli/records:record_writer", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", "@com_google_absl//absl/strings:string_view", "@rules_python//python/cc:current_py_cc_headers", ], ) py_extension( name = "record_position", srcs = ["record_position.cc"], hdrs = ["record_position.h"], # record_position.cc has #define before #include to influence what the # included files provide. features = ["-use_header_modules"], deps = [ "//python/riegeli/base:utils", "//riegeli/base:arithmetic", "//riegeli/base:types", "//riegeli/records:record_position", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/hash", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", "@rules_python//python/cc:current_py_cc_headers", ], ) py_library( name = "skipped_region", srcs = ["skipped_region.py"], ) proto_library( name = "records_metadata_proto", srcs = ["records_metadata.proto"], deps = ["@com_google_protobuf//:descriptor_proto"], ) py_proto_library( name = "records_metadata_py_pb2", deps = [":records_metadata_proto"], ) ================================================ FILE: python/riegeli/records/__init__.py ================================================ ================================================ FILE: python/riegeli/records/examples/BUILD ================================================ load("@rules_python//python:defs.bzl", "py_binary") package(features = ["header_modules"]) licenses(["notice"]) py_binary( name = "write_read_records", srcs = ["write_read_records.py"], deps = [ "//python/riegeli", "//python/riegeli/records/tests:records_test_py_pb2", ], ) ================================================ FILE: python/riegeli/records/examples/__init__.py ================================================ ================================================ FILE: python/riegeli/records/examples/write_read_records.py ================================================ # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Simple example which writes and reads a Riegeli/records file.""" import io import riegeli from riegeli.records.tests import records_test_pb2 def sample_string(i, size): piece = f'{i} '.encode() result = piece * -(-size // len(piece)) # len(result) >= size return result[:size] def sample_message(i, size): return records_test_pb2.SimpleMessage(id=i, payload=sample_string(i, size)) def write_records(filename): print('Writing', filename) metadata = riegeli.RecordsMetadata() riegeli.set_record_type(metadata, records_test_pb2.SimpleMessage) with riegeli.RecordWriter( io.FileIO(filename, mode='wb'), options='transpose', metadata=metadata ) as writer: writer.write_messages(sample_message(i, 100) for i in range(100)) def read_records(filename): print('Reading', filename) with riegeli.RecordReader( io.FileIO(filename, mode='rb'), field_projection=[[ records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name['id'].number ]], ) as reader: print( ' '.join( str(record.id) for record in reader.read_messages(records_test_pb2.SimpleMessage) ) ) def main(): filename = '/tmp/riegeli_example' write_records(filename) read_records(filename) if __name__ == '__main__': main() ================================================ FILE: python/riegeli/records/record_position.cc ================================================ // Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #define PY_SSIZE_T_CLEAN #include // clang-format: do not reorder the above include. #include "python/riegeli/records/record_position.h" // clang-format: do not reorder the above include. #include #include #include #include #include #include "absl/base/optimization.h" #include "absl/hash/hash.h" #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "python/riegeli/base/utils.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/types.h" #include "riegeli/records/record_position.h" namespace riegeli::python { namespace { struct PyRecordPositionObject { // clang-format off PyObject_HEAD static_assert(true, ""); // clang-format workaround. // clang-format on PythonWrapped record_position; }; extern PyTypeObject PyRecordPosition_Type; // `extern "C"` sets the C calling convention for compatibility with the Python // API. `static` avoids making symbols public, as `extern "C"` trumps anonymous // namespace. extern "C" { static void RecordPositionDestructor(PyRecordPositionObject* self) { PythonUnlocked([&] { self->record_position.reset(); }); Py_TYPE(self)->tp_free(self); } static PyRecordPositionObject* RecordPositionNew(PyTypeObject* cls, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"chunk_begin", "record_index", nullptr}; PyObject* chunk_begin_arg; PyObject* record_index_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "OO:RecordPosition", const_cast(keywords), &chunk_begin_arg, &record_index_arg))) { return nullptr; } const std::optional chunk_begin = PositionFromPython(chunk_begin_arg); if (ABSL_PREDICT_FALSE(chunk_begin == std::nullopt)) return nullptr; const std::optional record_index = PositionFromPython(record_index_arg); if (ABSL_PREDICT_FALSE(record_index == std::nullopt)) return nullptr; if (ABSL_PREDICT_FALSE(*chunk_begin > std::numeric_limits::max()) || ABSL_PREDICT_FALSE(*record_index > std::numeric_limits::max() - *chunk_begin)) { PyErr_Format(PyExc_OverflowError, "RecordPosition overflow: %llu/%llu", static_cast(*chunk_begin), static_cast(*record_index)); return nullptr; } std::unique_ptr self( reinterpret_cast(cls->tp_alloc(cls, 0))); if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr; self->record_position.emplace(RecordPosition( IntCast(*chunk_begin), IntCast(*record_index))); return self.release(); } static PyObject* RecordPositionChunkBegin(PyRecordPositionObject* self, void* closure) { const RecordPosition pos = PythonUnlocked([&] { return self->record_position->get(); }); return PositionToPython(pos.chunk_begin()).release(); } static PyObject* RecordPositionRecordIndex(PyRecordPositionObject* self, void* closure) { const RecordPosition pos = PythonUnlocked([&] { return self->record_position->get(); }); return PositionToPython(pos.record_index()).release(); } static PyObject* RecordPositionNumeric(PyRecordPositionObject* self, void* closure) { const RecordPosition pos = PythonUnlocked([&] { return self->record_position->get(); }); return PositionToPython(pos.numeric()).release(); } static PyObject* RecordPositionCompare(PyObject* a, PyObject* b, int op) { if (ABSL_PREDICT_FALSE(!PyObject_TypeCheck(a, &PyRecordPosition_Type)) || ABSL_PREDICT_FALSE(!PyObject_TypeCheck(b, &PyRecordPosition_Type))) { Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } RecordPosition a_pos; RecordPosition b_pos; PythonUnlocked([&] { a_pos = reinterpret_cast(a)->record_position->get(); b_pos = reinterpret_cast(b)->record_position->get(); }); switch (op) { case Py_EQ: return PyBool_FromLong(a_pos == b_pos); case Py_NE: return PyBool_FromLong(a_pos != b_pos); case Py_LT: return PyBool_FromLong(a_pos < b_pos); case Py_GT: return PyBool_FromLong(a_pos > b_pos); case Py_LE: return PyBool_FromLong(a_pos <= b_pos); case Py_GE: return PyBool_FromLong(a_pos >= b_pos); default: Py_INCREF(Py_NotImplemented); return Py_NotImplemented; } } static Py_hash_t RecordPositionHash(PyRecordPositionObject* self) { const RecordPosition pos = PythonUnlocked([&] { return self->record_position->get(); }); Py_hash_t hash = static_cast(absl::Hash()(pos)); if (ABSL_PREDICT_FALSE(hash == -1)) hash = -2; return hash; } static PyObject* RecordPositionStr(PyRecordPositionObject* self) { const RecordPosition pos = PythonUnlocked([&] { return self->record_position->get(); }); return StringToPython(pos.ToString()).release(); } static PyRecordPositionObject* RecordPositionFromStr(PyTypeObject* cls, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"serialized", nullptr}; PyObject* serialized_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:from_str", const_cast(keywords), &serialized_arg))) { return nullptr; } StrOrBytes serialized; if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_arg))) { return nullptr; } RecordPosition pos; if (ABSL_PREDICT_FALSE(!pos.FromString(serialized))) { PyErr_SetString(PyExc_ValueError, "RecordPosition.from_str() failed"); return nullptr; } std::unique_ptr self( reinterpret_cast(cls->tp_alloc(cls, 0))); if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr; self->record_position.emplace(pos); return self.release(); } static PyObject* RecordPositionRepr(PyRecordPositionObject* self) { const RecordPosition pos = PythonUnlocked([&] { return self->record_position->get(); }); return StringToPython(absl::StrCat("RecordPosition(", pos.chunk_begin(), ", ", pos.record_index(), ")")) .release(); } static PyObject* RecordPositionToBytes(PyRecordPositionObject* self, PyObject* args) { const RecordPosition pos = PythonUnlocked([&] { return self->record_position->get(); }); return BytesToPython(pos.ToBytes()).release(); } static PyRecordPositionObject* RecordPositionFromBytes(PyTypeObject* cls, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"serialized", nullptr}; PyObject* serialized_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:from_bytes", const_cast(keywords), &serialized_arg))) { return nullptr; } BytesLike serialized; if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_arg))) { return nullptr; } RecordPosition pos; if (ABSL_PREDICT_FALSE(!pos.FromBytes(serialized))) { PyErr_SetString(PyExc_ValueError, "RecordPosition.from_bytes() failed"); return nullptr; } std::unique_ptr self( reinterpret_cast(cls->tp_alloc(cls, 0))); if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr; self->record_position.emplace(pos); return self.release(); } } // extern "C" const PyMethodDef RecordPositionMethods[] = { {"from_str", reinterpret_cast(RecordPositionFromStr), METH_VARARGS | METH_KEYWORDS | METH_CLASS, R"doc( from_str(type, serialized: str | bytes) -> RecordPosition Parses RecordPosition from its text format. Args: serialized: Text string to parse. )doc"}, {"to_bytes", reinterpret_cast(RecordPositionToBytes), METH_NOARGS, R"doc( to_bytes(self) -> bytes Returns the RecordPosition serialized to its binary format. Serialized byte strings have the same natural order as the corresponding positions. )doc"}, {"from_bytes", reinterpret_cast(RecordPositionFromBytes), METH_VARARGS | METH_KEYWORDS | METH_CLASS, R"doc( from_bytes( type, serialized: bytes | bytearray | memoryview) -> RecordPosition Parses RecordPosition from its binary format. Serialized byte strings have the same natural order as the corresponding positions. Args: serialized: Byte string to parse. )doc"}, {nullptr, nullptr, 0, nullptr}, }; const PyGetSetDef RecordPositionGetSet[] = { {const_cast("chunk_begin"), reinterpret_cast(RecordPositionChunkBegin), nullptr, const_cast(R"doc( chunk_begin: int File position of the beginning of the chunk containing the given record. )doc"), nullptr}, {const_cast("record_index"), reinterpret_cast(RecordPositionRecordIndex), nullptr, const_cast(R"doc( record_index: int Index of the record within the chunk. )doc"), nullptr}, {const_cast("numeric"), reinterpret_cast(RecordPositionNumeric), nullptr, const_cast(R"doc( numeric: int Converts RecordPosition to an integer scaled between 0 and file size. Distinct RecordPositions of a valid file have distinct numeric values. )doc"), nullptr}, {nullptr, nullptr, nullptr, nullptr, nullptr}}; PyTypeObject PyRecordPosition_Type = { // clang-format off PyVarObject_HEAD_INIT(&PyType_Type, 0) // clang-format on "riegeli.records.record_position.RecordPosition", // tp_name sizeof(PyRecordPositionObject), // tp_basicsize 0, // tp_itemsize reinterpret_cast(RecordPositionDestructor), // tp_dealloc #if PY_VERSION_HEX >= 0x03080000 0, // tp_vectorcall_offset #else nullptr, // tp_print #endif nullptr, // tp_getattr nullptr, // tp_setattr nullptr, // tp_as_async reinterpret_cast(RecordPositionRepr), // tp_repr nullptr, // tp_as_number nullptr, // tp_as_sequence nullptr, // tp_as_mapping reinterpret_cast(RecordPositionHash), // tp_hash nullptr, // tp_call reinterpret_cast(RecordPositionStr), // tp_str nullptr, // tp_getattro nullptr, // tp_setattro nullptr, // tp_as_buffer Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags R"doc( RecordPosition(chunk_begin: int, record_index: int) -> RecordPosition Represents a position in a Riegeli/records file. There are two ways of expressing positions, both strictly monotonic: * RecordPosition - Faster for seeking. * int - Scaled between 0 and file size. RecordPosition can be converted to int by the numeric property. Working with RecordPosition is recommended, unless it is needed to seek to an approximate position interpolated along the file, e.g. for splitting the file into shards, or unless the position must be expressed as an integer from the range [0, file_size] in order to fit into a preexisting API. Both RecordReader and RecordWriter return positions. A position from RecordWriter can act as a future: accessing its contents for the first time might block, waiting for pending operations to complete. )doc", // tp_doc nullptr, // tp_traverse nullptr, // tp_clear RecordPositionCompare, // tp_richcompare 0, // tp_weaklistoffset nullptr, // tp_iter nullptr, // tp_iternext const_cast(RecordPositionMethods), // tp_methods nullptr, // tp_members const_cast(RecordPositionGetSet), // tp_getset nullptr, // tp_base nullptr, // tp_dict nullptr, // tp_descr_get nullptr, // tp_descr_set 0, // tp_dictoffset nullptr, // tp_init nullptr, // tp_alloc reinterpret_cast(RecordPositionNew), // tp_new nullptr, // tp_free nullptr, // tp_is_gc nullptr, // tp_bases nullptr, // tp_mro nullptr, // tp_cache nullptr, // tp_subclasses nullptr, // tp_weaklist nullptr, // tp_del 0, // tp_version_tag nullptr, // tp_finalize }; PythonPtr RecordPositionToPython(FutureRecordPosition value) { PythonPtr self(PyRecordPosition_Type.tp_alloc(&PyRecordPosition_Type, 0)); if (ABSL_PREDICT_FALSE(self == nullptr)) return nullptr; reinterpret_cast(self.get()) ->record_position.emplace(std::move(value)); return self; } std::optional RecordPositionFromPython(PyObject* object) { if (ABSL_PREDICT_FALSE(!PyObject_TypeCheck(object, &PyRecordPosition_Type))) { PyErr_Format(PyExc_TypeError, "Expected RecordPosition, not %s", Py_TYPE(object)->tp_name); return std::nullopt; } return PythonUnlocked([&] { return reinterpret_cast(object) ->record_position->get(); }); } const char* const kModuleName = "riegeli.records.record_position"; const char kModuleDoc[] = R"doc(Represents a position in a Riegeli/records file.)doc"; PyModuleDef kModuleDef = { PyModuleDef_HEAD_INIT, kModuleName, // m_name kModuleDoc, // m_doc -1, // m_size nullptr, // m_methods nullptr, // m_slots nullptr, // m_traverse nullptr, // m_clear nullptr, // m_free }; PyObject* InitModule() { if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordPosition_Type) < 0)) { return nullptr; } PythonPtr module(PyModule_Create(&kModuleDef)); if (ABSL_PREDICT_FALSE(module == nullptr)) return nullptr; Py_INCREF(&PyRecordPosition_Type); if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "RecordPosition", reinterpret_cast( &PyRecordPosition_Type)) < 0)) { return nullptr; } static constexpr RecordPositionApi kRecordPositionApi = { RecordPositionToPython, RecordPositionFromPython, }; if (ABSL_PREDICT_FALSE(!ExportCapsule( module.get(), kRecordPositionCapsuleName, &kRecordPositionApi))) { return nullptr; } return module.release(); } } // namespace PyMODINIT_FUNC PyInit_record_position() { return InitModule(); } } // namespace riegeli::python ================================================ FILE: python/riegeli/records/record_position.h ================================================ // Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_ #define PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_ // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #include // clang-format: do not reorder the above include. #include #include "python/riegeli/base/utils.h" #include "riegeli/records/record_position.h" namespace riegeli::python { // Access the API thus: // ``` // static constexpr ImportedCapsule kRecordPositionApi( // kRecordPositionCapsuleName); // ``` struct RecordPositionApi { PythonPtr (*RecordPositionToPython)(FutureRecordPosition value); std::optional (*RecordPositionFromPython)(PyObject* object); }; inline constexpr const char* kRecordPositionCapsuleName = "riegeli.records.record_position._CPPAPI"; } // namespace riegeli::python #endif // PYTHON_RIEGELI_RECORDS_RECORD_POSITION_H_ ================================================ FILE: python/riegeli/records/record_reader.cc ================================================ // Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #define PY_SSIZE_T_CLEAN #include // clang-format: do not reorder the above include. #include #include #include #include #include #include "absl/base/optimization.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "python/riegeli/base/utils.h" #include "python/riegeli/bytes/python_reader.h" #include "python/riegeli/records/record_position.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/chain.h" #include "riegeli/base/compare.h" #include "riegeli/base/types.h" #include "riegeli/chunk_encoding/field_projection.h" #include "riegeli/records/record_position.h" #include "riegeli/records/record_reader.h" #include "riegeli/records/skipped_region.h" namespace riegeli::python { namespace { constexpr ImportedCapsule kRecordPositionApi( kRecordPositionCapsuleName); // `extern "C"` sets the C calling convention for compatibility with the Python // API. `static` avoids making symbols public, as `extern "C"` trumps anonymous // namespace. extern "C" { static PyObject* GetRecordType(PyObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"metadata", nullptr}; PyObject* metadata_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:get_record_type", const_cast(keywords), &metadata_arg))) { return nullptr; } // record_type_name = metadata.record_type_name static constexpr Identifier id_record_type_name("record_type_name"); const PythonPtr record_type_name( PyObject_GetAttr(metadata_arg, id_record_type_name.get())); if (ABSL_PREDICT_FALSE(record_type_name == nullptr)) return nullptr; // if not record_type_name: return None const int record_type_name_is_true = PyObject_IsTrue(record_type_name.get()); if (ABSL_PREDICT_FALSE(record_type_name_is_true < 0)) return nullptr; if (record_type_name_is_true == 0) Py_RETURN_NONE; // file_descriptors = metadata.file_descriptor static constexpr Identifier id_file_descriptor("file_descriptor"); const PythonPtr file_descriptors( PyObject_GetAttr(metadata_arg, id_file_descriptor.get())); if (ABSL_PREDICT_FALSE(file_descriptors == nullptr)) return nullptr; // if not file_descriptors: return None const int file_descriptors_is_true = PyObject_IsTrue(file_descriptors.get()); if (ABSL_PREDICT_FALSE(file_descriptors_is_true < 0)) return nullptr; if (file_descriptors_is_true == 0) Py_RETURN_NONE; // pool = DescriptorPool() static constexpr ImportedConstant kDescriptorPool( "google.protobuf.descriptor_pool", "DescriptorPool"); if (ABSL_PREDICT_FALSE(!kDescriptorPool.Verify())) return nullptr; const PythonPtr pool( PyObject_CallFunctionObjArgs(kDescriptorPool.get(), nullptr)); if (ABSL_PREDICT_FALSE(pool == nullptr)) return nullptr; // for file_descriptor in file_descriptors: // pool.Add(file_descriptor) const PythonPtr iter(PyObject_GetIter(file_descriptors.get())); if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr; while (const PythonPtr file_descriptor{PyIter_Next(iter.get())}) { static constexpr Identifier id_Add("Add"); const PythonPtr add_result(PyObject_CallMethodObjArgs( pool.get(), id_Add.get(), file_descriptor.get(), nullptr)); if (ABSL_PREDICT_FALSE(add_result == nullptr)) return nullptr; } if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return nullptr; // message_descriptor = pool.FindMessageTypeByName(record_type_name) static constexpr Identifier id_FindMessageTypeByName("FindMessageTypeByName"); const PythonPtr message_descriptor( PyObject_CallMethodObjArgs(pool.get(), id_FindMessageTypeByName.get(), record_type_name.get(), nullptr)); if (ABSL_PREDICT_FALSE(message_descriptor == nullptr)) return nullptr; // return GetMessageClass(message_descriptor) const PythonPtr message_factory( PyImport_ImportModule("google.protobuf.message_factory")); if (ABSL_PREDICT_FALSE(message_factory == nullptr)) return nullptr; static constexpr Identifier id_GetMessageClass("GetMessageClass"); return PyObject_CallMethodObjArgs(message_factory.get(), id_GetMessageClass.get(), message_descriptor.get(), nullptr); } } // extern "C" struct PyRecordReaderObject { // clang-format off PyObject_HEAD static_assert(true, ""); // clang-format workaround. // clang-format on PythonWrapped> record_reader; PyObject* recovery; PythonWrapped recovery_exception; }; extern PyTypeObject PyRecordReader_Type; struct PyRecordIterObject { // clang-format off PyObject_HEAD static_assert(true, ""); // clang-format workaround. // clang-format on PyObject* (*read_record)(PyRecordReaderObject* self, PyObject* args); PyRecordReaderObject* record_reader; PyObject* args; }; extern PyTypeObject PyRecordIter_Type; bool RecordReaderHasException(PyRecordReaderObject* self) { return self->recovery_exception.has_value() || !self->record_reader->ok(); } void SetExceptionFromRecordReader(PyRecordReaderObject* self) { if (self->recovery_exception.has_value()) { self->recovery_exception->Restore(); return; } RIEGELI_ASSERT(!self->record_reader->ok()) << "Failed precondition of SetExceptionFromRecordReader(): " "RecordReader OK"; if (!self->record_reader->src().exception().ok()) { self->record_reader->src().exception().Restore(); return; } SetRiegeliError(self->record_reader->status()); } std::optional VerifyFieldNumber(long field_number_value) { static_assert(Field::kExistenceOnly == 0, "VerifyFieldNumber() assumes that Field::kExistenceOnly == 0"); if (ABSL_PREDICT_FALSE(field_number_value < Field::kExistenceOnly || field_number_value > (1 << 29) - 1)) { PyErr_Format(PyExc_OverflowError, "Field number out of range: %ld", field_number_value); return std::nullopt; } return IntCast(field_number_value); } std::optional FieldNumberFromPython(PyObject* object) { if (ABSL_PREDICT_FALSE(!PyLong_Check(object))) { PyErr_Format(PyExc_TypeError, "Expected int, not %s", Py_TYPE(object)->tp_name); return std::nullopt; } const long field_number_value = PyLong_AsLong(object); if (ABSL_PREDICT_FALSE(field_number_value == -1) && PyErr_Occurred()) { return std::nullopt; } return VerifyFieldNumber(field_number_value); } std::optional FieldProjectionFromPython(PyObject* object) { FieldProjection field_projection; const PythonPtr field_iter(PyObject_GetIter(object)); if (ABSL_PREDICT_FALSE(field_iter == nullptr)) return std::nullopt; while (const PythonPtr field_object{PyIter_Next(field_iter.get())}) { Field field; const PythonPtr field_number_iter(PyObject_GetIter(field_object.get())); if (ABSL_PREDICT_FALSE(field_number_iter == nullptr)) return std::nullopt; while (const PythonPtr field_number_object{ PyIter_Next(field_number_iter.get())}) { const std::optional field_number = FieldNumberFromPython(field_number_object.get()); if (ABSL_PREDICT_FALSE(field_number == std::nullopt)) return std::nullopt; field.AddFieldNumber(*field_number); } if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return std::nullopt; field_projection.AddField(std::move(field)); } if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return std::nullopt; return field_projection; } // `extern "C"` sets the C calling convention for compatibility with the Python // API. `static` avoids making symbols public, as `extern "C"` trumps anonymous // namespace. extern "C" { static void RecordReaderDestructor(PyRecordReaderObject* self) { PyObject_GC_UnTrack(self); #if PY_VERSION_HEX < 0x030D0000 // < 3.13 Py_TRASHCAN_BEGIN(self, RecordReaderDestructor); #endif PythonUnlocked([&] { self->record_reader.reset(); }); Py_XDECREF(self->recovery); self->recovery_exception.reset(); Py_TYPE(self)->tp_free(self); #if PY_VERSION_HEX < 0x030D0000 // < 3.13 Py_TRASHCAN_END; #endif } static int RecordReaderTraverse(PyRecordReaderObject* self, visitproc visit, void* arg) { Py_VISIT(self->recovery); if (self->recovery_exception.has_value()) { const int recovery_exception_result = self->recovery_exception->Traverse(visit, arg); if (ABSL_PREDICT_FALSE(recovery_exception_result != 0)) { return recovery_exception_result; } } if (self->record_reader.has_value()) { return self->record_reader->src().Traverse(visit, arg); } return 0; } static int RecordReaderClear(PyRecordReaderObject* self) { PythonUnlocked([&] { self->record_reader.reset(); }); Py_CLEAR(self->recovery); self->recovery_exception.reset(); return 0; } static int RecordReaderInit(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"src", "owns_src", "assumed_pos", "min_buffer_size", "max_buffer_size", "buffer_size", "field_projection", "recovery", nullptr}; PyObject* src_arg; PyObject* owns_src_arg = nullptr; PyObject* assumed_pos_arg = nullptr; PyObject* min_buffer_size_arg = nullptr; PyObject* max_buffer_size_arg = nullptr; PyObject* buffer_size_arg = nullptr; PyObject* field_projection_arg = nullptr; PyObject* recovery_arg = nullptr; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O|$OOOOOOO:RecordReader", const_cast(keywords), &src_arg, &owns_src_arg, &assumed_pos_arg, &min_buffer_size_arg, &max_buffer_size_arg, &buffer_size_arg, &field_projection_arg, &recovery_arg))) { return -1; } PythonReader::Options python_reader_options; python_reader_options.set_owns_src(true); if (owns_src_arg != nullptr) { const int owns_src_is_true = PyObject_IsTrue(owns_src_arg); if (ABSL_PREDICT_FALSE(owns_src_is_true < 0)) return -1; python_reader_options.set_owns_src(owns_src_is_true != 0); } if (assumed_pos_arg != nullptr && assumed_pos_arg != Py_None) { const std::optional assumed_pos = PositionFromPython(assumed_pos_arg); if (ABSL_PREDICT_FALSE(assumed_pos == std::nullopt)) return -1; python_reader_options.set_assumed_pos(*assumed_pos); } if (buffer_size_arg != nullptr && buffer_size_arg != Py_None) { min_buffer_size_arg = buffer_size_arg; max_buffer_size_arg = buffer_size_arg; } if (min_buffer_size_arg != nullptr) { const std::optional min_buffer_size = SizeFromPython(min_buffer_size_arg); if (ABSL_PREDICT_FALSE(min_buffer_size == std::nullopt)) return -1; python_reader_options.set_min_buffer_size(*min_buffer_size); } if (max_buffer_size_arg != nullptr) { const std::optional max_buffer_size = SizeFromPython(max_buffer_size_arg); if (ABSL_PREDICT_FALSE(max_buffer_size == std::nullopt)) return -1; python_reader_options.set_max_buffer_size(*max_buffer_size); } RecordReaderBase::Options record_reader_options; if (field_projection_arg != nullptr && field_projection_arg != Py_None) { std::optional field_projection = FieldProjectionFromPython(field_projection_arg); if (ABSL_PREDICT_FALSE(field_projection == std::nullopt)) return -1; record_reader_options.set_field_projection(*std::move(field_projection)); } if (recovery_arg != nullptr && recovery_arg != Py_None) { Py_INCREF(recovery_arg); Py_XDECREF(self->recovery); self->recovery = recovery_arg; record_reader_options.set_recovery([self]( const SkippedRegion& skipped_region, RecordReaderBase& record_reader) { PythonLock lock; const PythonPtr begin_object = PositionToPython(skipped_region.begin()); if (ABSL_PREDICT_FALSE(begin_object == nullptr)) { self->recovery_exception.emplace(Exception::Fetch()); return false; } const PythonPtr end_object = PositionToPython(skipped_region.end()); if (ABSL_PREDICT_FALSE(end_object == nullptr)) { self->recovery_exception.emplace(Exception::Fetch()); return false; } const PythonPtr message_object = StringToPython(skipped_region.message()); if (ABSL_PREDICT_FALSE(message_object == nullptr)) { self->recovery_exception.emplace(Exception::Fetch()); return false; } static constexpr ImportedConstant kSkippedRegion( "riegeli.records.skipped_region", "SkippedRegion"); if (ABSL_PREDICT_FALSE(!kSkippedRegion.Verify())) { self->recovery_exception.emplace(Exception::Fetch()); return false; } const PythonPtr skipped_region_object(PyObject_CallFunctionObjArgs( kSkippedRegion.get(), begin_object.get(), end_object.get(), message_object.get(), nullptr)); if (ABSL_PREDICT_FALSE(skipped_region_object == nullptr)) { self->recovery_exception.emplace(Exception::Fetch()); return false; } const PythonPtr recovery_result(PyObject_CallFunctionObjArgs( self->recovery, skipped_region_object.get(), nullptr)); if (ABSL_PREDICT_FALSE(recovery_result == nullptr)) { if (PyErr_ExceptionMatches(PyExc_StopIteration)) { PyErr_Clear(); } else { self->recovery_exception.emplace(Exception::Fetch()); } return false; } return true; }); } PythonReader python_reader(src_arg, std::move(python_reader_options)); PythonUnlocked([&] { self->record_reader.emplace(std::move(python_reader), std::move(record_reader_options)); }); if (ABSL_PREDICT_FALSE(!self->record_reader->ok())) { self->record_reader->src().Close(); SetExceptionFromRecordReader(self); return -1; } return 0; } static PyObject* RecordReaderSrc(PyRecordReaderObject* self, void* closure) { PyObject* const src = ABSL_PREDICT_FALSE(!self->record_reader.has_value()) ? Py_None : self->record_reader->src().src(); Py_INCREF(src); return src; } static PyObject* RecordReaderRepr(PyRecordReaderObject* self) { const PythonPtr format = StringToPython(""); if (ABSL_PREDICT_FALSE(format == nullptr)) return nullptr; // return format.format(self.src) PyObject* const src = ABSL_PREDICT_FALSE(!self->record_reader.has_value()) ? Py_None : self->record_reader->src().src(); static constexpr Identifier id_format("format"); return PyObject_CallMethodObjArgs(format.get(), id_format.get(), src, nullptr); } static PyObject* RecordReaderEnter(PyObject* self, PyObject* args) { // return self Py_INCREF(self); return self; } static PyObject* RecordReaderExit(PyRecordReaderObject* self, PyObject* args) { PyObject* exc_type; PyObject* exc_value; PyObject* traceback; if (ABSL_PREDICT_FALSE(!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &traceback))) { return nullptr; } // self.close(), suppressing exceptions if exc_type != None. if (ABSL_PREDICT_TRUE(self->record_reader.has_value())) { const bool close_ok = PythonUnlocked([&] { return self->record_reader->Close(); }); if (ABSL_PREDICT_FALSE(!close_ok) && exc_type == Py_None) { SetExceptionFromRecordReader(self); return nullptr; } } Py_RETURN_FALSE; } static PyObject* RecordReaderClose(PyRecordReaderObject* self, PyObject* args) { if (ABSL_PREDICT_TRUE(self->record_reader.has_value())) { const bool close_ok = PythonUnlocked([&] { return self->record_reader->Close(); }); if (ABSL_PREDICT_FALSE(!close_ok)) { SetExceptionFromRecordReader(self); return nullptr; } } Py_RETURN_NONE; } static PyObject* RecordReaderCheckFileFormat(PyRecordReaderObject* self, PyObject* args) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; const bool check_file_format_ok = PythonUnlocked([&] { return self->record_reader->CheckFileFormat(); }); if (ABSL_PREDICT_FALSE(!check_file_format_ok)) { if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_FALSE; } Py_RETURN_TRUE; } static PyObject* RecordReaderReadMetadata(PyRecordReaderObject* self, PyObject* args) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; Chain metadata; const bool read_serialized_metadata_ok = PythonUnlocked( [&] { return self->record_reader->ReadSerializedMetadata(metadata); }); if (ABSL_PREDICT_FALSE(!read_serialized_metadata_ok)) { if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_NONE; } const PythonPtr serialized_metadata = ChainToPython(metadata); if (ABSL_PREDICT_FALSE(serialized_metadata == nullptr)) return nullptr; // return RecordsMetadata.FromString(serialized_metadata) static constexpr ImportedConstant kRecordsMetadata( "riegeli.records.records_metadata_pb2", "RecordsMetadata"); if (ABSL_PREDICT_FALSE(!kRecordsMetadata.Verify())) return nullptr; static constexpr ImportedConstant kDecodeError("google.protobuf.message", "DecodeError"); if (ABSL_PREDICT_FALSE(!kDecodeError.Verify())) return nullptr; static constexpr Identifier id_FromString("FromString"); PythonPtr metadata_object( PyObject_CallMethodObjArgs(kRecordsMetadata.get(), id_FromString.get(), serialized_metadata.get(), nullptr)); if (ABSL_PREDICT_FALSE(metadata_object == nullptr)) { if (self->record_reader->recovery() != nullptr && PyErr_ExceptionMatches(kDecodeError.get())) { const Exception exception = Exception::Fetch(); if (self->record_reader->recovery()( SkippedRegion(self->record_reader->last_pos().chunk_begin(), self->record_reader->pos().numeric(), exception.message()), *self->record_reader)) { // Recovered metadata decoding, assume empty `RecordsMetadata`. return PyObject_CallFunctionObjArgs(kRecordsMetadata.get(), nullptr); } if (ABSL_PREDICT_FALSE(self->recovery_exception.has_value())) { self->recovery_exception->Restore(); return nullptr; } Py_RETURN_NONE; } return nullptr; } return metadata_object.release(); } static PyObject* RecordReaderReadSerializedMetadata(PyRecordReaderObject* self, PyObject* args) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; Chain metadata; const bool read_serialized_metadata_ok = PythonUnlocked( [&] { return self->record_reader->ReadSerializedMetadata(metadata); }); if (ABSL_PREDICT_FALSE(!read_serialized_metadata_ok)) { if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_NONE; } return ChainToPython(metadata).release(); } static PyObject* RecordReaderReadRecord(PyRecordReaderObject* self, PyObject* args) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; Chain record; const bool read_record_ok = PythonUnlocked([&] { return self->record_reader->ReadRecord(record); }); if (ABSL_PREDICT_FALSE(!read_record_ok)) { if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_NONE; } return ChainToPython(record).release(); } static PyObject* RecordReaderReadMessage(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"message_type", nullptr}; PyObject* message_type_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:read_message", const_cast(keywords), &message_type_arg))) { return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; absl::string_view record; for (;;) { const bool read_record_ok = PythonUnlocked([&] { return self->record_reader->ReadRecord(record); }); if (ABSL_PREDICT_FALSE(!read_record_ok)) { if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_NONE; } MemoryView memory_view; PyObject* const record_object = memory_view.ToPython(record); if (ABSL_PREDICT_FALSE(record_object == nullptr)) return nullptr; static constexpr ImportedConstant kDecodeError("google.protobuf.message", "DecodeError"); if (ABSL_PREDICT_FALSE(!kDecodeError.Verify())) return nullptr; // return message_type.FromString(record) static constexpr Identifier id_FromString("FromString"); PythonPtr message(PyObject_CallMethodObjArgs( message_type_arg, id_FromString.get(), record_object, nullptr)); if (ABSL_PREDICT_FALSE(message == nullptr)) { if (self->record_reader->recovery() != nullptr && PyErr_ExceptionMatches(kDecodeError.get())) { const Exception exception = Exception::Fetch(); if (ABSL_PREDICT_FALSE(!memory_view.Release())) return nullptr; if (self->record_reader->recovery()( SkippedRegion(self->record_reader->last_pos().numeric(), self->record_reader->pos().numeric(), exception.message()), *self->record_reader)) { continue; } if (ABSL_PREDICT_FALSE(self->recovery_exception.has_value())) { self->recovery_exception->Restore(); return nullptr; } Py_RETURN_NONE; } return nullptr; } if (ABSL_PREDICT_FALSE(!memory_view.Release())) return nullptr; return message.release(); } } static PyRecordIterObject* RecordReaderReadRecords(PyRecordReaderObject* self, PyObject* args) { std::unique_ptr iter( PyObject_GC_New(PyRecordIterObject, &PyRecordIter_Type)); if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr; iter->read_record = [](PyRecordReaderObject* self, PyObject* args) { return RecordReaderReadRecord(self, args); }; Py_INCREF(self); iter->record_reader = self; iter->args = nullptr; return iter.release(); } static PyRecordIterObject* RecordReaderReadMessages(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"message_type", nullptr}; PyObject* message_type_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:read_messages", const_cast(keywords), &message_type_arg))) { return nullptr; } std::unique_ptr iter( PyObject_GC_New(PyRecordIterObject, &PyRecordIter_Type)); if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr; iter->read_record = [](PyRecordReaderObject* self, PyObject* args) { return RecordReaderReadMessage(self, args, nullptr); }; Py_INCREF(self); iter->record_reader = self; iter->args = PyTuple_Pack(1, message_type_arg); if (ABSL_PREDICT_FALSE(iter->args == nullptr)) return nullptr; return iter.release(); } static PyObject* RecordReaderSetFieldProjection(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"field_projection", nullptr}; PyObject* field_projection_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:set_field_projection", const_cast(keywords), &field_projection_arg))) { return nullptr; } std::optional field_projection; if (field_projection_arg == Py_None) { field_projection = FieldProjection::All(); } else { field_projection = FieldProjectionFromPython(field_projection_arg); if (ABSL_PREDICT_FALSE(field_projection == std::nullopt)) return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; const bool set_field_projection_ok = PythonUnlocked([&] { return self->record_reader->SetFieldProjection( *std::move(field_projection)); }); if (ABSL_PREDICT_FALSE(!set_field_projection_ok)) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_NONE; } static PyObject* RecordReaderLastPos(PyRecordReaderObject* self, void* closure) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr; if (ABSL_PREDICT_FALSE(!self->record_reader->last_record_is_valid())) { SetRiegeliError(absl::FailedPreconditionError("No record was read")); return nullptr; } return kRecordPositionApi ->RecordPositionToPython(self->record_reader->last_pos()) .release(); } static PyObject* RecordReaderPos(PyRecordReaderObject* self, void* closure) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr; return kRecordPositionApi->RecordPositionToPython(self->record_reader->pos()) .release(); } static PyObject* RecordReaderSupportsRandomAccess(PyRecordReaderObject* self, void* closure) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; return PyBool_FromLong(self->record_reader->SupportsRandomAccess()); } static PyObject* RecordReaderSeek(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"pos", nullptr}; PyObject* pos_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:seek", const_cast(keywords), &pos_arg))) { return nullptr; } if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr; const std::optional pos = kRecordPositionApi->RecordPositionFromPython(pos_arg); if (ABSL_PREDICT_FALSE(pos == std::nullopt)) return nullptr; if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; const bool seek_ok = PythonUnlocked([&] { return self->record_reader->Seek(*pos); }); if (ABSL_PREDICT_FALSE(!seek_ok)) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_NONE; } static PyObject* RecordReaderSeekNumeric(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"pos", nullptr}; PyObject* pos_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:seek_numeric", const_cast(keywords), &pos_arg))) { return nullptr; } const std::optional pos = PositionFromPython(pos_arg); if (ABSL_PREDICT_FALSE(pos == std::nullopt)) return nullptr; if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; const bool seek_ok = PythonUnlocked([&] { return self->record_reader->Seek(*pos); }); if (ABSL_PREDICT_FALSE(!seek_ok)) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_NONE; } static PyObject* RecordReaderSeekBack(PyRecordReaderObject* self, PyObject* args) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; const bool seek_back_ok = PythonUnlocked([&] { return self->record_reader->SeekBack(); }); if (ABSL_PREDICT_FALSE(!seek_back_ok)) { if (ABSL_PREDICT_FALSE(RecordReaderHasException(self))) { SetExceptionFromRecordReader(self); return nullptr; } Py_RETURN_FALSE; } Py_RETURN_TRUE; } static PyObject* RecordReaderSize(PyRecordReaderObject* self, PyObject* args) { if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; const std::optional size = PythonUnlocked([&] { return self->record_reader->Size(); }); if (ABSL_PREDICT_FALSE(size == std::nullopt)) { SetExceptionFromRecordReader(self); return nullptr; } return PositionToPython(*size).release(); } static PyObject* RecordReaderSearch(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"test", nullptr}; PyObject* test_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:search", const_cast(keywords), &test_arg))) { return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; std::optional test_exception; const std::optional result = PythonUnlocked([&] { return self->record_reader->Search( [&](RecordReaderBase&) -> std::optional { PythonLock lock; const PythonPtr test_result( PyObject_CallFunctionObjArgs(test_arg, self, nullptr)); if (ABSL_PREDICT_FALSE(test_result == nullptr)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } const std::optional ordering = PartialOrderingFromPython(test_result.get()); if (ABSL_PREDICT_FALSE(ordering == std::nullopt)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } return *ordering; }); }); if (ABSL_PREDICT_FALSE(result == std::nullopt)) { if (test_exception != std::nullopt) { test_exception->Restore(); } else { SetExceptionFromRecordReader(self); } return nullptr; } return PartialOrderingToPython(*result).release(); } static PyObject* RecordReaderSearchForRecord(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"test", nullptr}; PyObject* test_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:search_for_record", const_cast(keywords), &test_arg))) { return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; std::optional test_exception; const std::optional result = PythonUnlocked([&] { return self->record_reader->Search( [&](const Chain& record) -> std::optional { PythonLock lock; const PythonPtr record_object = ChainToPython(record); if (ABSL_PREDICT_FALSE(record_object == nullptr)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } const PythonPtr test_result(PyObject_CallFunctionObjArgs( test_arg, record_object.get(), nullptr)); if (ABSL_PREDICT_FALSE(test_result == nullptr)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } const std::optional ordering = PartialOrderingFromPython(test_result.get()); if (ABSL_PREDICT_FALSE(ordering == std::nullopt)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } return *ordering; }); }); if (ABSL_PREDICT_FALSE(result == std::nullopt)) { if (test_exception != std::nullopt) { test_exception->Restore(); if (PyErr_ExceptionMatches(PyExc_StopIteration)) { PyErr_Clear(); Py_RETURN_NONE; } } else { SetExceptionFromRecordReader(self); } return nullptr; } return PartialOrderingToPython(*result).release(); } static PyObject* RecordReaderSearchForMessage(PyRecordReaderObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"message_type", "test", nullptr}; PyObject* message_type_arg; PyObject* test_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "OO:search_for_message", const_cast(keywords), &message_type_arg, &test_arg))) { return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_reader.Verify())) return nullptr; static constexpr ImportedConstant kDecodeError("google.protobuf.message", "DecodeError"); if (ABSL_PREDICT_FALSE(!kDecodeError.Verify())) return nullptr; // `RecordReader::Search(test)` sets the recovery function to `nullptr` while // calling `test()`. Save it here to call it explicitly in `test()`. std::function recovery = self->record_reader->recovery(); std::optional test_exception; const std::optional result = PythonUnlocked([&] { return self->record_reader->Search( [&](absl::string_view record) -> std::optional { PythonLock lock; MemoryView memory_view; PyObject* const record_object = memory_view.ToPython(record); if (ABSL_PREDICT_FALSE(record_object == nullptr)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } // message = message_type.FromString(record) static constexpr Identifier id_FromString("FromString"); const PythonPtr message(PyObject_CallMethodObjArgs( message_type_arg, id_FromString.get(), record_object, nullptr)); if (ABSL_PREDICT_FALSE(message == nullptr)) { if (recovery != nullptr && PyErr_ExceptionMatches(kDecodeError.get())) { const Exception exception = Exception::Fetch(); if (ABSL_PREDICT_FALSE(!memory_view.Release())) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } if (recovery( SkippedRegion(self->record_reader->last_pos().numeric(), self->record_reader->pos().numeric(), exception.message()), *self->record_reader)) { // Declare the skipped record unordered. return PartialOrdering::unordered; } if (ABSL_PREDICT_FALSE(self->recovery_exception.has_value())) { return std::nullopt; } // Cancel the search. PyErr_SetNone(PyExc_StopIteration); } test_exception.emplace(Exception::Fetch()); return std::nullopt; } if (ABSL_PREDICT_FALSE(!memory_view.Release())) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } const PythonPtr test_result( PyObject_CallFunctionObjArgs(test_arg, message.get(), nullptr)); if (ABSL_PREDICT_FALSE(test_result == nullptr)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } const std::optional ordering = PartialOrderingFromPython(test_result.get()); if (ABSL_PREDICT_FALSE(ordering == std::nullopt)) { test_exception.emplace(Exception::Fetch()); return std::nullopt; } return *ordering; }); }); if (ABSL_PREDICT_FALSE(result == std::nullopt)) { if (test_exception != std::nullopt) { test_exception->Restore(); if (PyErr_ExceptionMatches(PyExc_StopIteration)) { PyErr_Clear(); Py_RETURN_NONE; } } else { SetExceptionFromRecordReader(self); } return nullptr; } return PartialOrderingToPython(*result).release(); } } // extern "C" const PyMethodDef RecordReaderMethods[] = { {"__enter__", RecordReaderEnter, METH_NOARGS, R"doc( __enter__(self) -> RecordReader Returns self. )doc"}, {"__exit__", reinterpret_cast(RecordReaderExit), METH_VARARGS, R"doc( __exit__(self, exc_type, exc_value, traceback) -> bool Calls close(). Suppresses exceptions from close() if an exception is already in flight. Args: exc_type: None or exception in flight (type). exc_value: None or exception in flight (value). traceback: None or exception in flight (traceback). )doc"}, {"close", reinterpret_cast(RecordReaderClose), METH_NOARGS, R"doc( close(self) -> None Indicates that reading is done. Verifies that the file is not truncated at the current position, i.e. that it either has more data or ends cleanly. Marks the RecordReader as closed, disallowing further reading. If the RecordReader was failed, raises the same exception again. If the RecordReader was not failed but already closed, does nothing. )doc"}, {"check_file_format", reinterpret_cast(RecordReaderCheckFileFormat), METH_NOARGS, R"doc( check_file_format(self) -> bool Ensures that the file looks like a valid Riegeli/Records file. Reading functions already check the file format. check_file_format() can verify the file format before (or instead of) performing other operations. This ignores the recovery function. If invalid file contents are skipped, then checking the file format is meaningless: any file can be read. Returns: True if this looks like a Riegeli/records file. False if the file ends before this could be determined. Raises: RiegeliError: If this is not a Riegeli/records file. )doc"}, {"read_metadata", reinterpret_cast(RecordReaderReadMetadata), METH_NOARGS, R"doc( read_metadata(self) -> RecordsMetadata | None Returns file metadata. Record type in metadata can be conveniently interpreted by get_record_type(). read_metadata() must be called while the RecordReader is at the beginning of the file (calling check_file_format() before is allowed). Returns: File metadata as parsed RecordsMetadata message, or None at end of file. )doc"}, {"read_serialized_metadata", reinterpret_cast(RecordReaderReadSerializedMetadata), METH_NOARGS, R"doc( read_serialized_metadata(self) -> bytes | None Returns file metadata. This is like read_metadata(), but metadata is returned in the serialized form. This is faster if the caller needs metadata already serialized. Returns: File metadata as serialized RecordsMetadata message, or None at end of file. )doc"}, {"read_record", reinterpret_cast(RecordReaderReadRecord), METH_NOARGS, R"doc( read_record(self) -> bytes | None Reads the next record. Returns: The record read as bytes, or None at end of file. )doc"}, {"read_message", reinterpret_cast(RecordReaderReadMessage), METH_VARARGS | METH_KEYWORDS, R"doc( read_message(self, message_type: type[Message]) -> Message | None Reads the next record. Args: message_type: Type of the message to parse the record as. Returns: The record read as a parsed message, or None at end of file. )doc"}, {"read_records", reinterpret_cast(RecordReaderReadRecords), METH_NOARGS, R"doc( read_records(self) -> Iterator[bytes] Returns an iterator which reads all remaining records. Yields: The next record read as bytes. )doc"}, {"read_messages", reinterpret_cast(RecordReaderReadMessages), METH_VARARGS | METH_KEYWORDS, R"doc( read_messages(self, message_type: type[Message]) -> Iterator[Message] Returns an iterator which reads all remaining records. Yields: The next record read as parsed message. )doc"}, {"set_field_projection", reinterpret_cast(RecordReaderSetFieldProjection), METH_VARARGS | METH_KEYWORDS, R"doc( set_field_projection( self, field_projection: Iterable[Iterable[int]] | None ) -> None Like field_projection constructor argument, but can be done at any time. Args: field_projection: If not None, the set of fields to be included in returned records, allowing to exclude the remaining fields (but does not guarantee that they will be excluded). Excluding data makes reading faster. Projection is effective if the file has been written with "transpose" in RecordWriter options. Additionally, "bucket_fraction" in RecordWriter options with a lower value can make reading with projection faster. A field projection is specified as an iterable of field paths. A field path is specified as an iterable of proto field numbers descending from the root message. A special field EXISTENCE_ONLY can be added to the end of the path; it preserves field existence but ignores its value; warning: for a repeated field this preserves the field count only if the field is not packed. )doc"}, {"seek", reinterpret_cast(RecordReaderSeek), METH_VARARGS | METH_KEYWORDS, R"doc( seek(self, pos: RecordPosition) -> None Seeks to a position. The position should have been obtained by pos for the same file. Args: pos: Seek target. )doc"}, {"seek_numeric", reinterpret_cast(RecordReaderSeekNumeric), METH_VARARGS | METH_KEYWORDS, R"doc( seek_numeric(self, pos: int) -> None Seeks to a position. The position can be any integer between 0 and file size. If it points between records, it is interpreted as the next record. Args: pos: Seek target. )doc"}, {"seek_back", reinterpret_cast(RecordReaderSeekBack), METH_NOARGS, R"doc( seek_back(self) -> bool Seeks back by one record. Returns: If successful, True. Returns False at the beginning of the file. )doc"}, {"size", reinterpret_cast(RecordReaderSize), METH_NOARGS, R"doc( size(self) -> int Returns the size of the file in bytes. This is the position corresponding to its end. )doc"}, {"search", reinterpret_cast(RecordReaderSearch), METH_VARARGS | METH_KEYWORDS, R"doc( search(self, test: Callable[[RecordReader], int | None]) -> None Searches the file for a desired record, or for a desired position between records, given that it is possible to determine whether a given record is before or after the desired position. The current position before calling search() does not matter. Args: test: A function which takes the RecordReader as a parameter, seeked to some record, and returns an int or None: * < 0: The current record is before the desired position. * == 0: The current record is desired, searching can stop. * > 0: The current record is after the desired position. * None: It could not be determined which is the case. The current record will be skipped. It can also raise StopIteration to cancel the search. Preconditions: * All < 0 records precede all == 0 records. * All == 0 records precede all > 0 records. * All < 0 records precede all > 0 records, even if there are no == 0 records. Return values: * 0: There is some == 0 record, and search() points to some such record. * 1: There are no == 0 records but there is some > 0 record, and search() points to the earliest such record. * -1: There are no == 0 nor > 0 records, but there is some < 0 record, and search() points to the end of file. * None: All records are None, and search() points to the end of file, or search() was cancelled. To find the earliest == 0 record instead of an arbitrary one, test() can be changed to return > 0 in place of == 0. Further guarantees: * If a test() returns == 0, search() points back to the record before test() and returns. * If a test() returns < 0, test() will not be called again at earlier positions. * If a test() returns > 0, test() will not be called again at later positions. * test() will not be called again at the same position. It follows that if a test() returns == 0 or > 0, search() points to the record before the last test() call with one of these results. This allows to communicate additional context of a == 0 or > 0 result by a side effect of test(). )doc"}, {"search_for_record", reinterpret_cast(RecordReaderSearchForRecord), METH_VARARGS | METH_KEYWORDS, R"doc( search_for_record(self, test: Callable[[bytes], int | None]) -> None A variant of search() which reads a record before calling test(), instead of letting test() read the record. Args: test: A function which takes the record read as bytes as a parameter, and returns an int or None, like in search(). )doc"}, {"search_for_message", reinterpret_cast(RecordReaderSearchForMessage), METH_VARARGS | METH_KEYWORDS, R"doc( search_for_message( self, message_type: type[Message], test: Callable[[Message], int | None] ) -> None A variant of search() which reads a record before calling test(), instead of letting test() read the record. Args: message_type: Type of the message to parse the record as. test: A function which takes the record read as a parsed message as a parameter, and returns an int or None, like in search(). )doc"}, {nullptr, nullptr, 0, nullptr}, }; const PyGetSetDef RecordReaderGetSet[] = { {const_cast("src"), reinterpret_cast(RecordReaderSrc), nullptr, const_cast(R"doc( src: BinaryIO Binary IO stream being read from. )doc"), nullptr}, {const_cast("last_pos"), reinterpret_cast(RecordReaderLastPos), nullptr, const_cast(R"doc( last_pos: RecordPosition The canonical position of the last record read. The canonical position is the largest among all equivalent positions. Seeking to any equivalent position leads to reading the same record. last_pos.numeric returns the position as an int. Precondition: a record was successfully read and there was no intervening call to close(), seek(), seek_numeric(), seek_back(), search(), search_for_record(), or search_for_message(). )doc"), nullptr}, {const_cast("pos"), reinterpret_cast(RecordReaderPos), nullptr, const_cast(R"doc( pos: RecordPosition A position of the next record. A position of the next record (or the end of file if there is no next record). A position which is not canonical can be smaller than the equivalent canonical position. Seeking to any equivalent position leads to reading the same record. pos.numeric returns the position as an int. pos is unchanged by close(). )doc"), nullptr}, {const_cast("supports_random_access"), reinterpret_cast(RecordReaderSupportsRandomAccess), nullptr, const_cast(R"doc( supports_random_access: bool True if this RecordReader supports random access. This includes seek(), seek_numeric(), and size(). )doc"), nullptr}, {nullptr, nullptr, nullptr, nullptr, nullptr}}; PyTypeObject PyRecordReader_Type = { // clang-format off PyVarObject_HEAD_INIT(&PyType_Type, 0) // clang-format on "riegeli.records.record_reader.RecordReader", // tp_name sizeof(PyRecordReaderObject), // tp_basicsize 0, // tp_itemsize reinterpret_cast(RecordReaderDestructor), // tp_dealloc #if PY_VERSION_HEX >= 0x03080000 0, // tp_vectorcall_offset #else nullptr, // tp_print #endif nullptr, // tp_getattr nullptr, // tp_setattr nullptr, // tp_as_async reinterpret_cast(RecordReaderRepr), // tp_repr nullptr, // tp_as_number nullptr, // tp_as_sequence nullptr, // tp_as_mapping nullptr, // tp_hash nullptr, // tp_call nullptr, // tp_str nullptr, // tp_getattro nullptr, // tp_setattro nullptr, // tp_as_buffer Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, // tp_flags R"doc( RecordReader( src: BinaryIO, *, owns_src: bool = True, assumed_pos: int | None = None, min_buffer_size: int = 4 << 10, max_buffer_size: int = 64 << 10, buffer_size: int | None, field_projection: Iterable[Iterable[int]] | None = None, recovery: Callable[[SkippedRegion], Any] | None = None) -> RecordReader Will read from the given file. Args: src: Binary IO stream to read from. owns_src: If True, src is owned, and close() or __exit__() calls src.close(). assumed_pos: If None, src must support random access, RecordReader will support random access, and RecordReader will set the position of src on close(). If an int, it is enough that src supports sequential access, and this position will be assumed initially. min_buffer_size: Tunes the minimal buffer size, which determines how much data at a time is typically read from src. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. max_buffer_size: Tunes the maximal buffer size, which determines how much data at a time is typically read from src. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. buffer_size: If not None, a shortcut for setting min_buffer_size and max_buffer_size to the same value. field_projection: If not None, the set of fields to be included in returned records, allowing to exclude the remaining fields (but does not guarantee that they will be excluded). Excluding data makes reading faster. Projection is effective if the file has been written with "transpose" in RecordWriter options. Additionally, "bucket_fraction" in RecordWriter options with a lower value can make reading with projection faster. A field projection is specified as an iterable of field paths. A field path is specified as an iterable of proto field numbers descending from the root message. A special field EXISTENCE_ONLY can be added to the end of the path; it preserves field existence but ignores its value; warning: for a repeated field this preserves the field count only if the field is not packed. recovery: If None, then invalid file contents cause RecordReader to raise RiegeliError. If not None, then invalid file contents cause RecordReader to skip over the invalid region and call this recovery function with a SkippedRegion as an argument. If the recovery function returns normally, reading continues. If the recovery function raises StopIteration, reading ends. If close() is called and file contents were truncated, the recovery function is called if set; the RecordReader remains closed. The src argument should be a binary IO stream which supports: * close() - for close() or __exit__() if owns_src * readinto1(memoryview) or readinto(memoryview) or read1(int) or read(int) * seek(int[, int]) - if assumed_pos is None, or for seek(), seek_numeric(), or size() * tell() - if assumed_pos is None, or for seek(), seek_numeric(), or size() Example values for src: * io.FileIO(filename, 'rb') * io.open(filename, 'rb') - better with buffering=0, or use io.FileIO() instead * open(filename, 'rb') - better with buffering=0, or use io.FileIO() instead * io.BytesIO(contents) * tf.io.gfile.GFile(filename, 'rb') Warning: if owns_src is False and assumed_pos is not None, src will have an unpredictable amount of extra data consumed because of buffering. )doc", // tp_doc reinterpret_cast(RecordReaderTraverse), // tp_traverse reinterpret_cast(RecordReaderClear), // tp_clear nullptr, // tp_richcompare 0, // tp_weaklistoffset nullptr, // tp_iter nullptr, // tp_iternext const_cast(RecordReaderMethods), // tp_methods nullptr, // tp_members const_cast(RecordReaderGetSet), // tp_getset nullptr, // tp_base nullptr, // tp_dict nullptr, // tp_descr_get nullptr, // tp_descr_set 0, // tp_dictoffset reinterpret_cast(RecordReaderInit), // tp_init nullptr, // tp_alloc PyType_GenericNew, // tp_new nullptr, // tp_free nullptr, // tp_is_gc nullptr, // tp_bases nullptr, // tp_mro nullptr, // tp_cache nullptr, // tp_subclasses nullptr, // tp_weaklist nullptr, // tp_del 0, // tp_version_tag nullptr, // tp_finalize }; // `extern "C"` sets the C calling convention for compatibility with the Python // API. `static` avoids making symbols public, as `extern "C"` trumps anonymous // namespace. extern "C" { static void RecordIterDestructor(PyRecordIterObject* self) { PyObject_GC_UnTrack(self); #if PY_VERSION_HEX < 0x030D0000 // < 3.13 Py_TRASHCAN_BEGIN(self, RecordIterDestructor); #endif Py_XDECREF(self->record_reader); Py_XDECREF(self->args); Py_TYPE(self)->tp_free(self); #if PY_VERSION_HEX < 0x030D0000 // < 3.13 Py_TRASHCAN_END; #endif } static int RecordIterTraverse(PyRecordIterObject* self, visitproc visit, void* arg) { Py_VISIT(self->record_reader); Py_VISIT(self->args); return 0; } static int RecordIterClear(PyRecordIterObject* self) { Py_CLEAR(self->record_reader); Py_CLEAR(self->args); return 0; } static PyObject* RecordIterNext(PyRecordIterObject* self) { PythonPtr read_record_result( self->read_record(self->record_reader, self->args)); if (ABSL_PREDICT_FALSE(read_record_result.get() == Py_None)) return nullptr; return read_record_result.release(); } } // extern "C" PyTypeObject PyRecordIter_Type = { // clang-format off PyVarObject_HEAD_INIT(&PyType_Type, 0) // clang-format on "RecordIter", // tp_name sizeof(PyRecordIterObject), // tp_basicsize 0, // tp_itemsize reinterpret_cast(RecordIterDestructor), // tp_dealloc #if PY_VERSION_HEX >= 0x03080000 0, // tp_vectorcall_offset #else nullptr, // tp_print #endif nullptr, // tp_getattr nullptr, // tp_setattr nullptr, // tp_as_async nullptr, // tp_repr nullptr, // tp_as_number nullptr, // tp_as_sequence nullptr, // tp_as_mapping nullptr, // tp_hash nullptr, // tp_call nullptr, // tp_str nullptr, // tp_getattro nullptr, // tp_setattro nullptr, // tp_as_buffer Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, // tp_flags nullptr, // tp_doc reinterpret_cast(RecordIterTraverse), // tp_traverse reinterpret_cast(RecordIterClear), // tp_clear nullptr, // tp_richcompare 0, // tp_weaklistoffset PyObject_SelfIter, // tp_iter reinterpret_cast(RecordIterNext), // tp_iternext nullptr, // tp_methods nullptr, // tp_members nullptr, // tp_getset nullptr, // tp_base nullptr, // tp_dict nullptr, // tp_descr_get nullptr, // tp_descr_set 0, // tp_dictoffset nullptr, // tp_init nullptr, // tp_alloc nullptr, // tp_new nullptr, // tp_free nullptr, // tp_is_gc nullptr, // tp_bases nullptr, // tp_mro nullptr, // tp_cache nullptr, // tp_subclasses nullptr, // tp_weaklist nullptr, // tp_del 0, // tp_version_tag nullptr, // tp_finalize }; const char* const kModuleName = "riegeli.records.record_reader"; const char kModuleDoc[] = R"doc(Reads records from a Riegeli/records file.)doc"; const PyMethodDef kModuleMethods[] = { {"get_record_type", reinterpret_cast(GetRecordType), METH_VARARGS | METH_KEYWORDS, R"doc( get_record_type(metadata: RecordsMetadata) -> type[Message] | None Interprets record_type_name and file_descriptor from metadata. Args: metadata: Riegeli/records file metadata, typically returned by RecordReader.read_metadata(). Returns: A generated message type corresponding to the type of records, or None if that information is not available in metadata. )doc"}, {nullptr, nullptr, 0, nullptr}, }; PyModuleDef kModuleDef = { PyModuleDef_HEAD_INIT, kModuleName, // m_name kModuleDoc, // m_doc -1, // m_size const_cast(kModuleMethods), // m_methods nullptr, // m_slots nullptr, // m_traverse nullptr, // m_clear nullptr, // m_free }; PyObject* InitModule() { if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordReader_Type) < 0)) { return nullptr; } if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordIter_Type) < 0)) { return nullptr; } PythonPtr module(PyModule_Create(&kModuleDef)); if (ABSL_PREDICT_FALSE(module == nullptr)) return nullptr; PythonPtr existence_only = IntToPython(Field::kExistenceOnly); if (ABSL_PREDICT_FALSE(existence_only == nullptr)) return nullptr; if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "EXISTENCE_ONLY", existence_only.release()) < 0)) { return nullptr; } Py_INCREF(&PyRecordReader_Type); if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "RecordReader", reinterpret_cast( &PyRecordReader_Type)) < 0)) { return nullptr; } return module.release(); } } // namespace PyMODINIT_FUNC PyInit_record_reader() { return InitModule(); } } // namespace riegeli::python ================================================ FILE: python/riegeli/records/record_writer.cc ================================================ // Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // From https://docs.python.org/3/c-api/intro.html: // Since Python may define some pre-processor definitions which affect the // standard headers on some systems, you must include Python.h before any // standard headers are included. #define PY_SSIZE_T_CLEAN #include // clang-format: do not reorder the above include. #include #include #include #include "absl/base/optimization.h" #include "absl/status/status.h" #include "absl/strings/string_view.h" #include "python/riegeli/base/utils.h" #include "python/riegeli/bytes/python_writer.h" #include "python/riegeli/records/record_position.h" #include "riegeli/base/assert.h" #include "riegeli/base/chain.h" #include "riegeli/base/types.h" #include "riegeli/records/record_writer.h" namespace riegeli::python { namespace { constexpr ImportedCapsule kRecordPositionApi( kRecordPositionCapsuleName); PyObject* PyFlushType_Type; PythonPtr DefineFlushType() { static constexpr ImportedConstant kEnum("enum", "Enum"); if (ABSL_PREDICT_FALSE(!kEnum.Verify())) return nullptr; static constexpr Identifier id_FlushType("FlushType"); const PythonPtr values(Py_BuildValue( "((si)(si)(si))", "FROM_OBJECT", static_cast(FlushType::kFromObject), "FROM_PROCESS", static_cast(FlushType::kFromProcess), "FROM_MACHINE", static_cast(FlushType::kFromMachine))); if (ABSL_PREDICT_FALSE(values == nullptr)) return nullptr; return PythonPtr(PyObject_CallFunctionObjArgs(kEnum.get(), id_FlushType.get(), values.get(), nullptr)); } bool FlushTypeFromPython(PyObject* object, FlushType* value) { RIEGELI_ASSERT_NE(PyFlushType_Type, nullptr) << "Python FlushType not defined yet"; if (ABSL_PREDICT_FALSE(!PyObject_IsInstance(object, PyFlushType_Type))) { PyErr_Format(PyExc_TypeError, "Expected FlushType, not %s", Py_TYPE(object)->tp_name); return false; } static constexpr Identifier id_value("value"); const PythonPtr enum_value(PyObject_GetAttr(object, id_value.get())); if (ABSL_PREDICT_FALSE(enum_value == nullptr)) return false; const long long_value = PyLong_AsLong(enum_value.get()); if (ABSL_PREDICT_FALSE(long_value == -1) && PyErr_Occurred()) return false; *value = static_cast(long_value); return true; } class FileDescriptorCollector { public: bool Init(PyObject* file_descriptors) { file_descriptors_ = file_descriptors; files_seen_.reset(PySet_New(nullptr)); return files_seen_ != nullptr; } bool AddFile(PyObject* file_descriptor) { // name = file_descriptor.name static constexpr Identifier id_name("name"); const PythonPtr name(PyObject_GetAttr(file_descriptor, id_name.get())); if (ABSL_PREDICT_FALSE(name == nullptr)) return false; // if name in self.files_seen: return const int contains = PySet_Contains(files_seen_.get(), name.get()); if (ABSL_PREDICT_FALSE(contains < 0)) return false; if (contains != 0) return true; // self.files_seen.add(name) if (ABSL_PREDICT_FALSE(PySet_Add(files_seen_.get(), name.get()) < 0)) { return false; } // for dependency in file_descriptor.dependencies: // self.add_file(dependency) static constexpr Identifier id_dependencies("dependencies"); const PythonPtr dependencies( PyObject_GetAttr(file_descriptor, id_dependencies.get())); if (ABSL_PREDICT_FALSE(dependencies == nullptr)) return false; const PythonPtr iter(PyObject_GetIter(dependencies.get())); if (ABSL_PREDICT_FALSE(iter == nullptr)) return false; while (const PythonPtr dependency{PyIter_Next(iter.get())}) { if (ABSL_PREDICT_FALSE(!AddFile(dependency.get()))) return false; } if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return false; // file_descriptor_proto = self.file_descriptors.add() static constexpr Identifier id_add("add"); const PythonPtr file_descriptor_proto( PyObject_CallMethodObjArgs(file_descriptors_, id_add.get(), nullptr)); if (ABSL_PREDICT_FALSE(file_descriptor_proto == nullptr)) return false; // file_descriptor.CopyToProto(file_descriptor_proto) static constexpr Identifier id_CopyToProto("CopyToProto"); return PythonPtr(PyObject_CallMethodObjArgs( file_descriptor, id_CopyToProto.get(), file_descriptor_proto.get(), nullptr)) != nullptr; } private: PyObject* file_descriptors_; PythonPtr files_seen_; }; // `extern "C"` sets the C calling convention for compatibility with the Python // API. `static` avoids making symbols public, as `extern "C"` trumps anonymous // namespace. extern "C" { static PyObject* SetRecordType(PyObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"metadata", "message_type", nullptr}; PyObject* metadata_arg; PyObject* message_type_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "OO:set_record_type", const_cast(keywords), &metadata_arg, &message_type_arg))) { return nullptr; } // message_descriptor = message_type.DESCRIPTOR static constexpr Identifier id_DESCRIPTOR("DESCRIPTOR"); const PythonPtr message_descriptor( PyObject_GetAttr(message_type_arg, id_DESCRIPTOR.get())); if (ABSL_PREDICT_FALSE(message_descriptor == nullptr)) return nullptr; // metadata.record_type_name = message_descriptor.full_name static constexpr Identifier id_full_name("full_name"); const PythonPtr full_name( PyObject_GetAttr(message_descriptor.get(), id_full_name.get())); if (ABSL_PREDICT_FALSE(full_name == nullptr)) return nullptr; static constexpr Identifier id_record_type_name("record_type_name"); if (ABSL_PREDICT_FALSE(PyObject_SetAttr(metadata_arg, id_record_type_name.get(), full_name.get()) < 0)) { return nullptr; } // file_descriptors = metadata.file_descriptor static constexpr Identifier id_file_descriptor("file_descriptor"); const PythonPtr file_descriptors( PyObject_GetAttr(metadata_arg, id_file_descriptor.get())); if (ABSL_PREDICT_FALSE(file_descriptors == nullptr)) return nullptr; // del file_descriptors[:] const PythonPtr slice(PySlice_New(nullptr, nullptr, nullptr)); if (ABSL_PREDICT_FALSE(slice == nullptr)) return nullptr; if (ABSL_PREDICT_FALSE(PyObject_DelItem(file_descriptors.get(), slice.get()) < 0)) { return nullptr; } // file_descriptor = message_descriptor.file static constexpr Identifier id_file("file"); const PythonPtr file_descriptor( PyObject_GetAttr(message_descriptor.get(), id_file.get())); if (ABSL_PREDICT_FALSE(file_descriptor == nullptr)) return nullptr; // FileDescriptorCollector(file_descriptors).add_file(file_descriptor) FileDescriptorCollector collector; if (ABSL_PREDICT_FALSE(!collector.Init(file_descriptors.get()))) { return nullptr; } if (ABSL_PREDICT_FALSE(!collector.AddFile(file_descriptor.get()))) { return nullptr; } Py_RETURN_NONE; } } // extern "C" struct PyRecordWriterObject { // clang-format off PyObject_HEAD static_assert(true, ""); // clang-format workaround. // clang-format on PythonWrapped> record_writer; }; extern PyTypeObject PyRecordWriter_Type; void SetExceptionFromRecordWriter(PyRecordWriterObject* self) { RIEGELI_ASSERT(!self->record_writer->ok()) << "Failed precondition of SetExceptionFromRecordWriter(): " "RecordWriter OK"; if (!self->record_writer->dest().exception().ok()) { self->record_writer->dest().exception().Restore(); return; } SetRiegeliError(self->record_writer->status()); } // `extern "C"` sets the C calling convention for compatibility with the Python // API. `static` avoids making symbols public, as `extern "C"` trumps anonymous // namespace. extern "C" { static void RecordWriterDestructor(PyRecordWriterObject* self) { PyObject_GC_UnTrack(self); Py_TRASHCAN_BEGIN(self, RecordWriterDestructor); PythonUnlocked([&] { self->record_writer.reset(); }); Py_TYPE(self)->tp_free(self); Py_TRASHCAN_END; } static int RecordWriterTraverse(PyRecordWriterObject* self, visitproc visit, void* arg) { if (self->record_writer.has_value()) { return self->record_writer->dest().Traverse(visit, arg); } return 0; } static int RecordWriterClear(PyRecordWriterObject* self) { PythonUnlocked([&] { self->record_writer.reset(); }); return 0; } static int RecordWriterInit(PyRecordWriterObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"dest", "owns_dest", "assumed_pos", "min_buffer_size", "max_buffer_size", "buffer_size", "options", "metadata", "serialized_metadata", nullptr}; PyObject* dest_arg; PyObject* owns_dest_arg = nullptr; PyObject* assumed_pos_arg = nullptr; PyObject* min_buffer_size_arg = nullptr; PyObject* max_buffer_size_arg = nullptr; PyObject* buffer_size_arg = nullptr; PyObject* options_arg = nullptr; PyObject* metadata_arg = nullptr; PyObject* serialized_metadata_arg = nullptr; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O|$OOOOOOOO:RecordWriter", const_cast(keywords), &dest_arg, &owns_dest_arg, &assumed_pos_arg, &min_buffer_size_arg, &max_buffer_size_arg, &buffer_size_arg, &options_arg, &metadata_arg, &serialized_metadata_arg))) { return -1; } PythonWriter::Options python_writer_options; python_writer_options.set_owns_dest(true); if (owns_dest_arg != nullptr) { const int owns_dest_is_true = PyObject_IsTrue(owns_dest_arg); if (ABSL_PREDICT_FALSE(owns_dest_is_true < 0)) return -1; python_writer_options.set_owns_dest(owns_dest_is_true != 0); } if (assumed_pos_arg != nullptr && assumed_pos_arg != Py_None) { const std::optional assumed_pos = PositionFromPython(assumed_pos_arg); if (ABSL_PREDICT_FALSE(assumed_pos == std::nullopt)) return -1; python_writer_options.set_assumed_pos(*assumed_pos); } if (buffer_size_arg != nullptr && buffer_size_arg != Py_None) { min_buffer_size_arg = buffer_size_arg; max_buffer_size_arg = buffer_size_arg; } if (min_buffer_size_arg != nullptr) { const std::optional min_buffer_size = SizeFromPython(min_buffer_size_arg); if (ABSL_PREDICT_FALSE(min_buffer_size == std::nullopt)) return -1; python_writer_options.set_min_buffer_size(*min_buffer_size); } if (max_buffer_size_arg != nullptr) { const std::optional max_buffer_size = SizeFromPython(max_buffer_size_arg); if (ABSL_PREDICT_FALSE(max_buffer_size == std::nullopt)) return -1; python_writer_options.set_max_buffer_size(*max_buffer_size); } RecordWriterBase::Options record_writer_options; if (options_arg != nullptr) { StrOrBytes options; if (ABSL_PREDICT_FALSE(!options.FromPython(options_arg))) return -1; if (const absl::Status status = record_writer_options.FromString(options); ABSL_PREDICT_FALSE(!status.ok())) { SetRiegeliError(status); return -1; } } if (metadata_arg != nullptr && metadata_arg != Py_None) { static constexpr Identifier id_SerializeToString("SerializeToString"); const PythonPtr serialized_metadata_str(PyObject_CallMethodObjArgs( metadata_arg, id_SerializeToString.get(), nullptr)); if (ABSL_PREDICT_FALSE(serialized_metadata_str == nullptr)) return -1; std::optional serialized_metadata = ChainFromPython(serialized_metadata_str.get()); if (ABSL_PREDICT_FALSE(serialized_metadata == std::nullopt)) return -1; record_writer_options.set_serialized_metadata( *std::move(serialized_metadata)); } if (serialized_metadata_arg != nullptr && serialized_metadata_arg != Py_None) { std::optional serialized_metadata = ChainFromPython(serialized_metadata_arg); if (ABSL_PREDICT_FALSE(serialized_metadata == std::nullopt)) return -1; if (record_writer_options.serialized_metadata() != std::nullopt) { PyErr_SetString(PyExc_TypeError, "RecordWriter() got conflicting keyword arguments " "'metadata' and 'serialized_metadata'"); return -1; } record_writer_options.set_serialized_metadata( *std::move(serialized_metadata)); } PythonWriter python_writer(dest_arg, std::move(python_writer_options)); PythonUnlocked([&] { self->record_writer.emplace(std::move(python_writer), std::move(record_writer_options)); }); if (ABSL_PREDICT_FALSE(!self->record_writer->ok())) { self->record_writer->dest().Close(); SetExceptionFromRecordWriter(self); return -1; } return 0; } static PyObject* RecordWriterDest(PyRecordWriterObject* self, void* closure) { PyObject* const dest = ABSL_PREDICT_FALSE(!self->record_writer.has_value()) ? Py_None : self->record_writer->dest().dest(); Py_INCREF(dest); return dest; } static PyObject* RecordWriterRepr(PyRecordWriterObject* self) { const PythonPtr format = StringToPython(""); if (ABSL_PREDICT_FALSE(format == nullptr)) return nullptr; // return format.format(self.dest) PyObject* const dest = ABSL_PREDICT_FALSE(!self->record_writer.has_value()) ? Py_None : self->record_writer->dest().dest(); static constexpr Identifier id_format("format"); return PyObject_CallMethodObjArgs(format.get(), id_format.get(), dest, nullptr); } static PyObject* RecordWriterEnter(PyObject* self, PyObject* args) { // return self Py_INCREF(self); return self; } static PyObject* RecordWriterExit(PyRecordWriterObject* self, PyObject* args) { PyObject* exc_type; PyObject* exc_value; PyObject* traceback; if (ABSL_PREDICT_FALSE(!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &traceback))) { return nullptr; } // self.close(), suppressing exceptions if exc_type != None. if (ABSL_PREDICT_TRUE(self->record_writer.has_value())) { const bool close_ok = PythonUnlocked([&] { return self->record_writer->Close(); }); if (ABSL_PREDICT_FALSE(!close_ok) && exc_type == Py_None) { SetExceptionFromRecordWriter(self); return nullptr; } } Py_RETURN_FALSE; } static PyObject* RecordWriterClose(PyRecordWriterObject* self, PyObject* args) { if (ABSL_PREDICT_TRUE(self->record_writer.has_value())) { const bool close_ok = PythonUnlocked([&] { return self->record_writer->Close(); }); if (ABSL_PREDICT_FALSE(!close_ok)) { SetExceptionFromRecordWriter(self); return nullptr; } } Py_RETURN_NONE; } static PyObject* RecordWriterWriteRecord(PyRecordWriterObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"record", nullptr}; PyObject* record_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:write_record", const_cast(keywords), &record_arg))) { return nullptr; } BytesLike record; if (ABSL_PREDICT_FALSE(!record.FromPython(record_arg))) return nullptr; if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; const bool write_record_ok = PythonUnlocked([&] { return self->record_writer->WriteRecord(record); }); if (ABSL_PREDICT_FALSE(!write_record_ok)) { SetExceptionFromRecordWriter(self); return nullptr; } Py_RETURN_NONE; } static PyObject* RecordWriterWriteMessage(PyRecordWriterObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"record", nullptr}; PyObject* record_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:write_message", const_cast(keywords), &record_arg))) { return nullptr; } // self.write_record(record.SerializeToString()) static constexpr Identifier id_SerializeToString("SerializeToString"); const PythonPtr serialized_object(PyObject_CallMethodObjArgs( record_arg, id_SerializeToString.get(), nullptr)); if (ABSL_PREDICT_FALSE(serialized_object == nullptr)) return nullptr; BytesLike serialized; if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_object.get()))) { return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; const bool write_record_ok = PythonUnlocked( [&] { return self->record_writer->WriteRecord(serialized); }); if (ABSL_PREDICT_FALSE(!write_record_ok)) { SetExceptionFromRecordWriter(self); return nullptr; } Py_RETURN_NONE; } static PyObject* RecordWriterWriteRecords(PyRecordWriterObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"records", nullptr}; PyObject* records_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:write_records", const_cast(keywords), &records_arg))) { return nullptr; } // for record in records: // self.write_record(record) const PythonPtr iter(PyObject_GetIter(records_arg)); if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr; while (const PythonPtr record_object{PyIter_Next(iter.get())}) { BytesLike record; if (ABSL_PREDICT_FALSE(!record.FromPython(record_object.get()))) { return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; const bool write_record_ok = PythonUnlocked( [&] { return self->record_writer->WriteRecord(record); }); if (ABSL_PREDICT_FALSE(!write_record_ok)) { SetExceptionFromRecordWriter(self); return nullptr; } } if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return nullptr; Py_RETURN_NONE; } static PyObject* RecordWriterWriteMessages(PyRecordWriterObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"records", nullptr}; PyObject* records_arg; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "O:write_messages", const_cast(keywords), &records_arg))) { return nullptr; } // for record in records: // self.write_record(record.SerializeToString()) const PythonPtr iter(PyObject_GetIter(records_arg)); if (ABSL_PREDICT_FALSE(iter == nullptr)) return nullptr; while (const PythonPtr record_object{PyIter_Next(iter.get())}) { static constexpr Identifier id_SerializeToString("SerializeToString"); const PythonPtr serialized_object(PyObject_CallMethodObjArgs( record_object.get(), id_SerializeToString.get(), nullptr)); if (ABSL_PREDICT_FALSE(serialized_object == nullptr)) return nullptr; BytesLike serialized; if (ABSL_PREDICT_FALSE(!serialized.FromPython(serialized_object.get()))) { return nullptr; } if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; const bool write_record_ok = PythonUnlocked( [&] { return self->record_writer->WriteRecord(serialized); }); if (ABSL_PREDICT_FALSE(!write_record_ok)) { SetExceptionFromRecordWriter(self); return nullptr; } } if (ABSL_PREDICT_FALSE(PyErr_Occurred() != nullptr)) return nullptr; Py_RETURN_NONE; } static PyObject* RecordWriterFlush(PyRecordWriterObject* self, PyObject* args, PyObject* kwargs) { static constexpr const char* keywords[] = {"flush_type", nullptr}; PyObject* flush_type_arg = nullptr; if (ABSL_PREDICT_FALSE(!PyArg_ParseTupleAndKeywords( args, kwargs, "|O:flush", const_cast(keywords), &flush_type_arg))) { return nullptr; } FlushType flush_type = FlushType::kFromProcess; if (flush_type_arg != nullptr) { if (ABSL_PREDICT_FALSE(!FlushTypeFromPython(flush_type_arg, &flush_type))) { return nullptr; } } if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; const bool flush_ok = PythonUnlocked([&] { return self->record_writer->Flush(flush_type); }); if (ABSL_PREDICT_FALSE(!flush_ok)) { SetExceptionFromRecordWriter(self); return nullptr; } Py_RETURN_NONE; } static PyObject* RecordWriterLastPos(PyRecordWriterObject* self, void* closure) { if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr; if (ABSL_PREDICT_FALSE(!self->record_writer->last_record_is_valid())) { SetRiegeliError(absl::FailedPreconditionError("No record was written")); return nullptr; } return kRecordPositionApi ->RecordPositionToPython(self->record_writer->LastPos()) .release(); } static PyObject* RecordWriterPos(PyRecordWriterObject* self, void* closure) { if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; if (ABSL_PREDICT_FALSE(!kRecordPositionApi.Verify())) return nullptr; return kRecordPositionApi->RecordPositionToPython(self->record_writer->Pos()) .release(); } static PyObject* RecordWriterEstimatedSize(PyRecordWriterObject* self, PyObject* args) { if (ABSL_PREDICT_FALSE(!self->record_writer.Verify())) return nullptr; return PositionToPython(self->record_writer->EstimatedSize()).release(); } } // extern "C" const PyMethodDef RecordWriterMethods[] = { {"__enter__", RecordWriterEnter, METH_NOARGS, R"doc( __enter__(self) -> RecordWriter Returns self. )doc"}, {"__exit__", reinterpret_cast(RecordWriterExit), METH_VARARGS, R"doc( __exit__(self, exc_type, exc_value, traceback) -> bool Calls close(). Suppresses exceptions from close() if an exception is already in flight. Args: exc_type: None or exception in flight (type). exc_value: None or exception in flight (value). traceback: None or exception in flight (traceback). )doc"}, {"close", reinterpret_cast(RecordWriterClose), METH_NOARGS, R"doc( close(self) -> None Indicates that writing is done. Writes buffered data to the file. Marks the RecordWriter as closed, disallowing further writing. If the RecordWriter was failed, raises the same exception again. If the RecordWriter was not failed but already closed, does nothing. )doc"}, {"write_record", reinterpret_cast(RecordWriterWriteRecord), METH_VARARGS | METH_KEYWORDS, R"doc( write_record(self, record: bytes | bytearray | memoryview) -> None Writes the next record. Args: record: Record to write as a bytes-like object. )doc"}, {"write_message", reinterpret_cast(RecordWriterWriteMessage), METH_VARARGS | METH_KEYWORDS, R"doc( write_message(self, record: Message) -> None Writes the next record. Args: record: Record to write as a proto message. )doc"}, {"write_records", reinterpret_cast(RecordWriterWriteRecords), METH_VARARGS | METH_KEYWORDS, R"doc( write_records( self, records: Iterable[bytes | bytearray | memoryview]) -> None Writes a number of records. Args: records: Records to write as an iterable of bytes-like objects. )doc"}, {"write_messages", reinterpret_cast(RecordWriterWriteMessages), METH_VARARGS | METH_KEYWORDS, R"doc( write_messages(self, records: Iterable[Message]) -> None Writes a number of records. Args: records: Records to write as an iterable of proto messages. )doc"}, {"flush", reinterpret_cast(RecordWriterFlush), METH_VARARGS | METH_KEYWORDS, R"doc( flush(self, flush_type: FlushType = FlushType.FROM_PROCESS) -> None Finalizes any open chunk and pushes buffered data to the destination. If parallelism was used in options, waits for any background writing to complete. This makes data written so far visible, but in contrast to close(), keeps the possibility to write more data later. What exactly does it mean for data to be visible depends on the destination. This degrades compression density if used too often. Args: flush_type: The scope of objects to flush and the intended data durability (without a guarantee). * FlushType.FROM_OBJECT: Makes data written so far visible in other objects, propagating flushing through owned dependencies of the given writer. * FlushType.FROM_PROCESS: Makes data written so far visible outside the process, propagating flushing through dependencies of the given writer. This is the default. * FlushType.FROM_MACHINE: Makes data written so far visible outside the process and durable in case of operating system crash, propagating flushing through dependencies of the given writer. )doc"}, {"estimated_size", reinterpret_cast(RecordWriterEstimatedSize), METH_NOARGS, R"doc( estimated_size(self) -> int Returns an estimation of the file size if no more data is written, without affecting data representation (i.e. without closing the current chunk) and without blocking. This is an underestimation because pending work is not taken into account: * The currently open chunk. * If parallelism was used in options, chunks being encoded in background. The exact file size can be found by flush(FlushType.FROM_OBJECT) which closes the currently open chunk, and pos.chunk_begin (record_index == 0 after flushing) which might need to wait for some background work to complete. )doc"}, {nullptr, nullptr, 0, nullptr}, }; const PyGetSetDef RecordWriterGetSet[] = { {const_cast("dest"), reinterpret_cast(RecordWriterDest), nullptr, const_cast(R"doc( dest: BinaryIO Binary IO stream being written to. )doc"), nullptr}, {const_cast("last_pos"), reinterpret_cast(RecordWriterLastPos), nullptr, const_cast(R"doc( last_pos: RecordPosition The canonical position of the last record written. The canonical position is the largest among all equivalent positions. Seeking to any equivalent position leads to reading the same record. last_pos.numeric returns the position as an int. Precondition: a record was successfully written )doc"), nullptr}, {const_cast("pos"), reinterpret_cast(RecordWriterPos), nullptr, const_cast(R"doc( pos: RecordPosition A position of the next record (or the end of file if there is no next record). A position which is not canonical can be smaller than the equivalent canonical position. Seeking to any equivalent position leads to reading the same record. pos.numeric returns the position as an int. After opening the file, close(), or flush(), pos is the canonical position of the next record, and pos.record_index == 0. )doc"), nullptr}, {nullptr, nullptr, nullptr, nullptr, nullptr}}; PyTypeObject PyRecordWriter_Type = { // clang-format off PyVarObject_HEAD_INIT(&PyType_Type, 0) // clang-format on "riegeli.records.record_writer.RecordWriter", // tp_name sizeof(PyRecordWriterObject), // tp_basicsize 0, // tp_itemsize reinterpret_cast(RecordWriterDestructor), // tp_dealloc #if PY_VERSION_HEX >= 0x03080000 0, // tp_vectorcall_offset #else nullptr, // tp_print #endif nullptr, // tp_getattr nullptr, // tp_setattr nullptr, // tp_as_async reinterpret_cast(RecordWriterRepr), // tp_repr nullptr, // tp_as_number nullptr, // tp_as_sequence nullptr, // tp_as_mapping nullptr, // tp_hash nullptr, // tp_call nullptr, // tp_str nullptr, // tp_getattro nullptr, // tp_setattro nullptr, // tp_as_buffer Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, // tp_flags R"doc( RecordWriter( dest: BinaryIO, *, owns_dest: bool = True, assumed_pos: int | None = None, min_buffer_size: int = 4 << 10, max_buffer_size: int = 64 << 10, buffer_size: int | None, options: str | bytes = '', metadata: RecordsMetadata | None = None, serialized_metadata: bytes | bytearray | memoryview = b'' ) -> RecordWriter Will write to the given file. Args: dest: Binary IO stream to write to. owns_dest: If True, dest is owned, close() or __exit__() calls dest.close(), and flush(flush_type) calls dest.flush() even if flush_type is FlushType.FROM_OBJECT. assumed_pos: If None, dest must support random access. If an int, it is enough that dest supports sequential access, and this position will be assumed initially. min_buffer_size: Tunes the minimal buffer size, which determines how much data at a time is typically written to dest. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. max_buffer_size: Tunes the maximal buffer size, which determines how much data at a time is typically written to dest. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. buffer_size: If not None, a shortcut for setting min_buffer_size and max_buffer_size to the same value. options: Compression and other writing options. See below. metadata: If not None, file metadata to be written at the beginning (if metadata has any fields set). Metadata are written only when the file is written from the beginning, not when it is appended to. Record type in metadata can be conveniently set by set_record_type(). serialized_metadata: If not empty, like metadata, but metadata are passed serialized as a bytes-like object. This is faster if the caller has metadata already serialized. This conflicts with metadata. The dest argument should be a binary IO stream which supports: * close() - for close() or __exit__() if owns_dest * write(bytes) * flush() - for flush() * seek(int[, int]) - if assumed_pos is None * tell() - if assumed_pos is None Example values for dest (possibly with 'ab' instead of 'wb' for appending): * io.FileIO(filename, 'wb') * io.open(filename, 'wb') - better with buffering=0, or use io.FileIO() instead * open(filename, 'wb') - better with buffering=0, or use io.FileIO() instead * io.BytesIO() - use owns_dest=False to access dest after closing the RecordWriter * tf.io.gfile.GFile(filename, 'wb') Options are documented at https://github.com/google/riegeli/blob/master/doc/record_writer_options.md )doc", // tp_doc reinterpret_cast(RecordWriterTraverse), // tp_traverse reinterpret_cast(RecordWriterClear), // tp_clear nullptr, // tp_richcompare 0, // tp_weaklistoffset nullptr, // tp_iter nullptr, // tp_iternext const_cast(RecordWriterMethods), // tp_methods nullptr, // tp_members const_cast(RecordWriterGetSet), // tp_getset nullptr, // tp_base nullptr, // tp_dict nullptr, // tp_descr_get nullptr, // tp_descr_set 0, // tp_dictoffset reinterpret_cast(RecordWriterInit), // tp_init nullptr, // tp_alloc PyType_GenericNew, // tp_new nullptr, // tp_free nullptr, // tp_is_gc nullptr, // tp_bases nullptr, // tp_mro nullptr, // tp_cache nullptr, // tp_subclasses nullptr, // tp_weaklist nullptr, // tp_del 0, // tp_version_tag nullptr, // tp_finalize }; const char* const kModuleName = "riegeli.records.record_writer"; const char kModuleDoc[] = R"doc(Writes records to a Riegeli/records file.)doc"; const PyMethodDef kModuleMethods[] = { {"set_record_type", reinterpret_cast(SetRecordType), METH_VARARGS | METH_KEYWORDS, R"doc( set_record_type(metadata: RecordsMetadata, message_type: type[Message]) -> None Sets record_type_name and file_descriptor in metadata. Args: metadata: Riegeli/records file metadata being filled, typically will become the metadata argument of RecordWriter(). message_type: Promised type of records, typically the argument type of RecordWriter.write_message(). )doc"}, {nullptr, nullptr, 0, nullptr}, }; PyModuleDef kModuleDef = { PyModuleDef_HEAD_INIT, kModuleName, // m_name kModuleDoc, // m_doc -1, // m_size const_cast(kModuleMethods), // m_methods nullptr, // m_slots nullptr, // m_traverse nullptr, // m_clear nullptr, // m_free }; PyObject* InitModule() { if (ABSL_PREDICT_FALSE(PyType_Ready(&PyRecordWriter_Type) < 0)) { return nullptr; } PythonPtr module(PyModule_Create(&kModuleDef)); if (ABSL_PREDICT_FALSE(module == nullptr)) return nullptr; PyFlushType_Type = DefineFlushType().release(); if (ABSL_PREDICT_FALSE(PyFlushType_Type == nullptr)) return nullptr; if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "FlushType", PyFlushType_Type) < 0)) { return nullptr; } Py_INCREF(&PyRecordWriter_Type); if (ABSL_PREDICT_FALSE(PyModule_AddObject(module.get(), "RecordWriter", reinterpret_cast( &PyRecordWriter_Type)) < 0)) { return nullptr; } return module.release(); } } // namespace PyMODINIT_FUNC PyInit_record_writer() { return InitModule(); } } // namespace riegeli::python ================================================ FILE: python/riegeli/records/records_metadata.proto ================================================ edition = "2024"; package riegeli; import "google/protobuf/descriptor.proto"; // Information about a Riegeli/records file, which may be helpful to interpret // file contents. message RecordsMetadata { // Human-readable explanation of what the file contains. string file_comment = 1; // If records are proto messages of a fixed type, the full name of their type. string record_type_name = 2; // If `record_type_name` is set, proto file descriptors which should contain // the definition of that type and their dependencies (each file comes after // all its dependencies). // // If `file_descriptor` is empty but `record_type_name` is set (not // recommended), `record_type_name` can be interpreted in the context of an // unspecified proto descriptor database. repeated google.protobuf.FileDescriptorProto file_descriptor = 3; // Options originally used to encode the file: // https://github.com/google/riegeli/blob/master/doc/record_writer_options.md // // They are informative here, they are never necessary to decode the file. string record_writer_options = 4; // Number of records in the file, so that the reader can tune for it. // // This is informative, the actual number of records may differ. int64 num_records = 5; // Clients can define custom metadata in extensions of this message. extensions 1000 to max; } ================================================ FILE: python/riegeli/records/skipped_region.py ================================================ # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Details about a skipped region of invalid file contents.""" __all__ = ('SkippedRegion',) class SkippedRegion: """Details about a skipped region of invalid file contents. Attributes: begin: File position of the beginning of the skipped region, inclusive. end: File position of the end of the skipped region, exclusive. length: Length of the skipped region, in bytes. message: Message explaining why the region is invalid. """ __slots__ = ('begin', 'end', 'message') def __init__(self, begin, end, message): if begin > end: raise ValueError(f'Positions in the wrong order: {begin} > {end}') self.begin = begin self.end = end self.message = message @property def length(self): return self.end - self.begin def __str__(self): return f'[{self.begin}..{self.end}): {self.message}' def __repr__(self): return f'SkippedRegion({self.begin}, {self.end}, {self.message!r})' ================================================ FILE: python/riegeli/records/tests/BUILD ================================================ load("@com_google_protobuf//bazel:proto_library.bzl", "proto_library") load("@com_google_protobuf//bazel:py_proto_library.bzl", "py_proto_library") load("@rules_python//python:defs.bzl", "py_test") package( default_visibility = [ "//python/riegeli:__subpackages__", ], features = ["header_modules"], ) licenses(["notice"]) py_test( name = "records_test", srcs = ["records_test.py"], deps = [ ":records_test_py_pb2", "//python/riegeli", "@absl_py//absl/logging", "@absl_py//absl/testing:absltest", "@absl_py//absl/testing:parameterized", "@com_google_protobuf//:protobuf_python", ], ) proto_library( name = "records_test_proto", srcs = ["records_test.proto"], ) py_proto_library( name = "records_test_py_pb2", deps = ["records_test_proto"], ) ================================================ FILE: python/riegeli/records/tests/__init__.py ================================================ ================================================ FILE: python/riegeli/records/tests/records_test.proto ================================================ edition = "2024"; package riegeli.tests; message SimpleMessage { int32 id = 1; bytes payload = 2; } ================================================ FILE: python/riegeli/records/tests/records_test.py ================================================ # Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import abc import contextlib from enum import Enum import io import itertools from absl import logging from absl.testing import absltest from absl.testing import parameterized from google.protobuf import message import riegeli from riegeli.records.tests import records_test_pb2 def combine_named_parameters(*testcase_sets): """Allows a parameterized test with multiple independent parameters. Example: combine_named_parameters([('BytesIO', BytesIOSpec), ('FileIO', FileIOSpec)], [('serial', 0), ('parallel', 10)]) yields the same elements as [('BytesIO_serial', BytesIOSpec, 0), ('BytesIO_parallel', BytesIOSpec, 10), ('FileIO_serial', FileIOSpec, 0), ('FileIO_parallel', FileIOSpec, 10)] """ for combination in itertools.product(*testcase_sets): key = '_'.join(name for name, _ in combination) values = [value for _, value in combination] yield tuple([key] + values) class RandomAccess(Enum): RANDOM_ACCESS = 1 SEQUENTIAL_ACCESS_DETECTED = 2 SEQUENTIAL_ACCESS_EXPLICIT = 3 class FakeFile: __slots__ = ('_random_access',) def __init__(self, random_access): self._random_access = random_access def seekable(self): return self._random_access def tell(self): if self._random_access: return 0 raise NotImplementedError('tell()') def __getattr__(self, name): raise NotImplementedError(f'{name}()') class UnseekableWrapper: __slots__ = ('_wrapped',) def __init__(self, wrapped): self._wrapped = wrapped def seekable(self): return False def tell(self, *args): raise NotImplementedError('tell()') def seek(self, *args): raise NotImplementedError('seek()') def __getattr__(self, name): return getattr(self._wrapped, name) class FileSpecBase(metaclass=abc.ABCMeta): __slots__ = ('_random_access', '_file') def __init__(self, create_tempfile, random_access): self._random_access = random_access self._file = None @abc.abstractmethod def _open_for_writing(self): raise NotImplementedError('_open_for_writing()') def writing_open(self): self._open_for_writing() logging.debug('Opened %r for writing', self._file) if self._random_access is RandomAccess.RANDOM_ACCESS: return self._file else: return UnseekableWrapper(self._file) @property def writing_should_close(self): return True @property def writing_assumed_pos(self): if self._random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT: return 0 return None @abc.abstractmethod def _open_for_reading(self): raise NotImplementedError('_open_for_reading()') def reading_open(self): self._open_for_reading() logging.debug('Opened %r for reading', self._file) if self._random_access is RandomAccess.RANDOM_ACCESS: return self._file else: return UnseekableWrapper(self._file) @property def reading_should_close(self): return True @property def reading_assumed_pos(self): if self._random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT: return 0 return None def close(self): pass class BytesIOSpec(FileSpecBase): __slots__ = () def _open_for_writing(self): self._file = io.BytesIO() @property def writing_should_close(self): # If BytesIO is closed, it loses bytes written. return False def _open_for_reading(self): if self._file is None: raise ValueError('file was not set') self._file.seek(0) class LocalFileSpecBase(FileSpecBase): __slots__ = ('_filename',) def __init__(self, create_tempfile, random_access): super().__init__(create_tempfile, random_access) self._filename = create_tempfile().full_path class FileIOSpec(LocalFileSpecBase): __slots__ = () def _open_for_writing(self): self._file = io.FileIO(self._filename, mode='wb') def _open_for_reading(self): self._file = io.FileIO(self._filename, mode='rb') class BufferedIOSpec(LocalFileSpecBase): __slots__ = () def _open_for_writing(self): self._file = io.open(self._filename, mode='wb') def _open_for_reading(self): self._file = io.open(self._filename, mode='rb') class BuiltinFileSpec(LocalFileSpecBase): __slots__ = () def _open_for_writing(self): self._file = open(self._filename, mode='wb') def _open_for_reading(self): self._file = open(self._filename, mode='rb') def sample_string(i, size): piece = f'{i} '.encode() result = piece * -(-size // len(piece)) # len(result) >= size return result[:size] def sample_message(i, size): return records_test_pb2.SimpleMessage(id=i, payload=sample_string(i, size)) def sample_message_id_only(i): return records_test_pb2.SimpleMessage(id=i) def sample_invalid_message(size): return b'\xff' * size # An unfinished varint. def record_writer_options(parallelism, transpose=False, chunk_size=35000): return ( f'{"transpose," if transpose else ""}uncompressed,' f'chunk_size:{chunk_size},parallelism:{parallelism}' ) _FILE_SPEC_VALUES = ( ('BytesIO', BytesIOSpec), ('FileIO', FileIOSpec), ('BufferedIO', BufferedIOSpec), ('BuiltinFile', BuiltinFileSpec), ) _RANDOM_ACCESS_VALUES = ( ('randomAccess', RandomAccess.RANDOM_ACCESS), ('sequentialAccessDetected', RandomAccess.SEQUENTIAL_ACCESS_DETECTED), ('sequentialAccessExplicit', RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT), ) _PARALLELISM_VALUES = (('serial', 0), ('parallel', 10)) _PARAMETERIZE_BY_FILE_SPEC = parameterized.named_parameters(*_FILE_SPEC_VALUES) _PARAMETERIZE_BY_RANDOM_ACCESS = parameterized.named_parameters( *_RANDOM_ACCESS_VALUES ) _PARAMETERIZE_BY_RANDOM_ACCESS_AND_PARALLELISM = parameterized.named_parameters( combine_named_parameters(_RANDOM_ACCESS_VALUES, _PARALLELISM_VALUES) ) _PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM = parameterized.named_parameters( combine_named_parameters(_FILE_SPEC_VALUES, _PARALLELISM_VALUES) ) _PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM = ( parameterized.named_parameters( combine_named_parameters( _FILE_SPEC_VALUES, _RANDOM_ACCESS_VALUES, _PARALLELISM_VALUES ) ) ) class RecordsTest(parameterized.TestCase): def corrupt_at(self, files, index): byte_reader = files.reading_open() contents1 = byte_reader.read(index) contents2 = byte_reader.read(1) contents2 = bytes([(contents2[0] + 1) % 256]) contents3 = byte_reader.read() if files.reading_should_close: byte_reader.close() byte_writer = files.writing_open() byte_writer.write(contents1) byte_writer.write(contents2) byte_writer.write(contents3) if files.writing_should_close: byte_writer.close() @_PARAMETERIZE_BY_RANDOM_ACCESS_AND_PARALLELISM def test_record_writer_exception_from_file(self, random_access, parallelism): byte_writer = FakeFile(random_access is RandomAccess.RANDOM_ACCESS) with self.assertRaises(NotImplementedError): with riegeli.RecordWriter( byte_writer, assumed_pos=( 0 if random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT else None ), options=record_writer_options(parallelism), ) as writer: writer.write_record(sample_string(0, 10000)) @_PARAMETERIZE_BY_RANDOM_ACCESS def test_record_reader_exception_from_file(self, random_access): byte_reader = FakeFile(random_access is RandomAccess.RANDOM_ACCESS) with self.assertRaises(NotImplementedError): with riegeli.RecordReader( byte_reader, owns_src=False, assumed_pos=( 0 if random_access is RandomAccess.SEQUENTIAL_ACCESS_EXPLICIT else None ), ) as reader: reader.read_record() @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_write_read_record(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): pos = writer.pos writer.write_record(sample_string(i, 10000)) canonical_pos = writer.last_pos if positions: self.assertGreater(pos, positions[-1]) self.assertLessEqual(pos, canonical_pos) positions.append(canonical_pos) writer.close() end_pos = writer.pos self.assertEqual(writer.last_pos, positions[-1]) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: for i in range(23): pos = reader.pos self.assertEqual(reader.read_record(), sample_string(i, 10000)) canonical_pos = reader.last_pos self.assertEqual(canonical_pos, positions[i]) self.assertLessEqual(pos, canonical_pos) self.assertIsNone(reader.read_record()) self.assertEqual(reader.pos, end_pos) reader.close() self.assertEqual(reader.pos, end_pos) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_write_read_message(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): pos = writer.pos writer.write_message(sample_message(i, 10000)) canonical_pos = writer.last_pos if positions: self.assertGreater(pos, positions[-1]) self.assertLessEqual(pos, canonical_pos) positions.append(canonical_pos) writer.close() end_pos = writer.pos self.assertEqual(writer.last_pos, positions[-1]) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: for i in range(23): pos = reader.pos self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) canonical_pos = reader.last_pos self.assertEqual(canonical_pos, positions[i]) self.assertLessEqual(pos, canonical_pos) self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage)) self.assertEqual(reader.pos, end_pos) reader.close() self.assertEqual(reader.pos, end_pos) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_write_read_records(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: writer.write_records(sample_string(i, 10000) for i in range(23)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: self.assertEqual( list(reader.read_records()), [sample_string(i, 10000) for i in range(23)], ) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_write_read_messages(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: writer.write_messages(sample_message(i, 10000) for i in range(23)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: self.assertEqual( list(reader.read_messages(records_test_pb2.SimpleMessage)), [sample_message(i, 10000) for i in range(23)], ) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_write_read_messages_with_field_projection( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism, transpose=True), ) as writer: writer.write_messages(sample_message(i, 10000) for i in range(23)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, field_projection=[[ records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[ 'id' ].number ]], ) as reader: self.assertEqual( list(reader.read_messages(records_test_pb2.SimpleMessage)), [sample_message_id_only(i) for i in range(23)], ) @_PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM def test_write_read_messages_with_field_projection_later( self, file_spec, parallelism ): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism, transpose=True), ) as writer: writer.write_messages(sample_message(i, 10000) for i in range(23)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: for i in range(4): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) reader.set_field_projection([[ records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[ 'id' ].number ]]) for i in range(4, 14): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message_id_only(i), ) reader.set_field_projection(None) for i in range(14, 23): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage)) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_write_read_metadata(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: metadata_written = riegeli.RecordsMetadata() metadata_written.file_comment = 'Comment' riegeli.set_record_type(metadata_written, records_test_pb2.SimpleMessage) message_written = sample_message(7, 10) with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), metadata=metadata_written, ) as writer: writer.write_message(message_written) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: metadata_read = reader.read_metadata() self.assertEqual(metadata_read, metadata_written) record_type = riegeli.get_record_type(metadata_read) assert record_type is not None self.assertEqual( record_type.DESCRIPTOR.full_name, 'riegeli.tests.SimpleMessage' ) message_read = reader.read_message(record_type) assert message_read is not None # Serialize and deserialize because messages have descriptors of # different origins. self.assertEqual( records_test_pb2.SimpleMessage.FromString( message_read.SerializeToString() ), message_written, ) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_invalid_metadata_exception( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), serialized_metadata=sample_invalid_message(100), ): pass with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: with self.assertRaises(message.DecodeError): reader.read_metadata() @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_invalid_metadata_recovery( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), serialized_metadata=sample_invalid_message(100), ): pass def recovery(skipped_region): pass with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=recovery, ) as reader: self.assertEqual(reader.read_metadata(), riegeli.RecordsMetadata()) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_invalid_metadata_recovery_stop_iteration( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), serialized_metadata=sample_invalid_message(100), ): pass def recovery(skipped_region): raise StopIteration with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=recovery, ) as reader: self.assertIsNone(reader.read_metadata()) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_field_projection(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=f'{record_writer_options(parallelism)},transpose', ) as writer: for i in range(23): writer.write_message(sample_message(i, 10000)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, field_projection=[[ records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[ 'id' ].number ]], ) as reader: for i in range(23): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), records_test_pb2.SimpleMessage(id=i), ) self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage)) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_field_projection_existence_only( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=f'{record_writer_options(parallelism)},transpose', ) as writer: for i in range(23): writer.write_message(sample_message(i, 10000)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, field_projection=[ [ records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[ 'id' ].number ], [ records_test_pb2.SimpleMessage.DESCRIPTOR.fields_by_name[ 'payload' ].number, riegeli.EXISTENCE_ONLY, ], ], ) as reader: for i in range(23): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), records_test_pb2.SimpleMessage(id=i, payload=b''), ) self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage)) @_PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM def test_seek(self, file_spec, parallelism): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): pos = writer.pos writer.write_record(sample_string(i, 10000)) canonical_pos = writer.last_pos if positions: self.assertGreater(pos, positions[-1]) self.assertLessEqual(pos, canonical_pos) positions.append(canonical_pos) writer.close() end_pos = writer.pos with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: reader.seek(positions[9]) self.assertGreater(reader.pos, positions[8]) self.assertLessEqual(reader.pos, positions[9]) reader.seek(positions[9]) self.assertGreater(reader.pos, positions[8]) self.assertLessEqual(reader.pos, positions[9]) reader.seek(positions[11]) self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) self.assertEqual(reader.read_record(), sample_string(11, 10000)) reader.seek(positions[9]) self.assertGreater(reader.pos, positions[8]) self.assertLessEqual(reader.pos, positions[9]) self.assertEqual(reader.read_record(), sample_string(9, 10000)) reader.seek(positions[11]) self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) self.assertEqual(reader.read_record(), sample_string(11, 10000)) reader.seek(positions[13]) self.assertGreater(reader.pos, positions[12]) self.assertLessEqual(reader.pos, positions[13]) self.assertEqual(reader.read_record(), sample_string(13, 10000)) reader.seek(riegeli.RecordPosition(0, 0)) self.assertLessEqual(reader.pos, positions[0]) self.assertEqual(reader.read_record(), sample_string(0, 10000)) reader.seek(end_pos) self.assertLessEqual(reader.pos, end_pos) self.assertIsNone(reader.read_record()) reader.seek(positions[11]) self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) reader.close() self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) @_PARAMETERIZE_BY_FILE_SPEC_AND_PARALLELISM def test_seek_numeric(self, file_spec, parallelism): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): pos = writer.pos writer.write_record(sample_string(i, 10000)) canonical_pos = writer.last_pos if positions: self.assertGreater(pos, positions[-1]) self.assertLessEqual(pos, canonical_pos) positions.append(canonical_pos) writer.close() end_pos = writer.pos with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: reader.seek_numeric(positions[9].numeric) self.assertGreater(reader.pos, positions[8]) self.assertLessEqual(reader.pos, positions[9]) reader.seek_numeric(positions[9].numeric) self.assertGreater(reader.pos, positions[8]) self.assertLessEqual(reader.pos, positions[9]) reader.seek_numeric(positions[11].numeric) self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) self.assertEqual(reader.read_record(), sample_string(11, 10000)) reader.seek_numeric(positions[9].numeric) self.assertGreater(reader.pos, positions[8]) self.assertLessEqual(reader.pos, positions[9]) self.assertEqual(reader.read_record(), sample_string(9, 10000)) reader.seek_numeric(positions[11].numeric) self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) self.assertEqual(reader.read_record(), sample_string(11, 10000)) reader.seek_numeric(positions[13].numeric) self.assertGreater(reader.pos, positions[12]) self.assertLessEqual(reader.pos, positions[13]) self.assertEqual(reader.read_record(), sample_string(13, 10000)) reader.seek_numeric(0) self.assertLessEqual(reader.pos, positions[0]) self.assertEqual(reader.read_record(), sample_string(0, 10000)) reader.seek_numeric(end_pos.numeric) self.assertLessEqual(reader.pos, end_pos) self.assertIsNone(reader.read_record()) reader.seek_numeric(positions[11].numeric) self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) reader.close() self.assertGreater(reader.pos, positions[10]) self.assertLessEqual(reader.pos, positions[11]) @_PARAMETERIZE_BY_FILE_SPEC def test_seek_back(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0), ) as writer: for i in range(23): writer.write_record(sample_string(i, 10000)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: reader.seek_numeric(reader.size()) for i in reversed(range(23)): self.assertTrue(reader.seek_back()) self.assertEqual(reader.read_record(), sample_string(i, 10000)) self.assertTrue(reader.seek_back()) self.assertFalse(reader.seek_back()) @_PARAMETERIZE_BY_FILE_SPEC def test_search(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0), ) as writer: positions = [] for i in range(23): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) writer.close() end_pos = writer.pos with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: def test_function(search_target): def test(record_reader): msg = record_reader.read_message(records_test_pb2.SimpleMessage) return (msg.id > search_target) - (msg.id < search_target) return test self.assertEqual(reader.search(test_function(7)), 0) self.assertEqual(reader.pos, positions[7]) self.assertEqual(reader.search(test_function(0)), 0) self.assertEqual(reader.pos, positions[0]) self.assertEqual(reader.search(test_function(22)), 0) self.assertEqual(reader.pos, positions[22]) self.assertEqual(reader.search(test_function(23)), -1) self.assertEqual(reader.pos, end_pos) @_PARAMETERIZE_BY_FILE_SPEC def test_search_for_record(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0), ) as writer: positions = [] for i in range(23): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) writer.close() end_pos = writer.pos with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: def test_function(search_target): def test(record): msg = records_test_pb2.SimpleMessage.FromString(record) return (msg.id > search_target) - (msg.id < search_target) return test self.assertEqual(reader.search_for_record(test_function(7)), 0) self.assertEqual(reader.pos, positions[7]) self.assertEqual(reader.search_for_record(test_function(0)), 0) self.assertEqual(reader.pos, positions[0]) self.assertEqual(reader.search_for_record(test_function(22)), 0) self.assertEqual(reader.pos, positions[22]) self.assertEqual(reader.search_for_record(test_function(23)), -1) self.assertEqual(reader.pos, end_pos) @_PARAMETERIZE_BY_FILE_SPEC def test_search_for_record_stop_iteration(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0), ) as writer: for i in range(23): writer.write_message(sample_message(i, 10000)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: def test(record): raise StopIteration self.assertIsNone(reader.search_for_record(test)) @_PARAMETERIZE_BY_FILE_SPEC def test_search_for_message(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0), ) as writer: positions = [] for i in range(23): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) writer.close() end_pos = writer.pos with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: def test_function(search_target): def test(msg): return (msg.id > search_target) - (msg.id < search_target) return test self.assertEqual( reader.search_for_message( records_test_pb2.SimpleMessage, test_function(7) ), 0, ) self.assertEqual(reader.pos, positions[7]) self.assertEqual( reader.search_for_message( records_test_pb2.SimpleMessage, test_function(0) ), 0, ) self.assertEqual(reader.pos, positions[0]) self.assertEqual( reader.search_for_message( records_test_pb2.SimpleMessage, test_function(22) ), 0, ) self.assertEqual(reader.pos, positions[22]) self.assertEqual( reader.search_for_message( records_test_pb2.SimpleMessage, test_function(23) ), -1, ) self.assertEqual(reader.pos, end_pos) @_PARAMETERIZE_BY_FILE_SPEC def test_search_for_message_stop_iteration(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0), ) as writer: for i in range(23): writer.write_message(sample_message(i, 10000)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: def test(record): raise StopIteration self.assertIsNone( reader.search_for_message(records_test_pb2.SimpleMessage, test) ) @_PARAMETERIZE_BY_FILE_SPEC def test_search_for_invalid_message_exception(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: # Write 1 valid message, 1 invalid message, and 1 valid message, each in a # separate chunk. with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0, chunk_size=15000), ) as writer: writer.write_message(sample_message(0, 10000)) writer.write_record(sample_invalid_message(10000)) writer.write_message(sample_message(2, 10000)) with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: def test_function(search_target): def test(msg): return (msg.id > search_target) - (msg.id < search_target) return test with self.assertRaises(message.DecodeError): reader.search_for_message( records_test_pb2.SimpleMessage, test_function(1) ) @_PARAMETERIZE_BY_FILE_SPEC def test_search_for_invalid_message_recovery(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: # Write 1 valid message, 1 invalid message, and 1 valid message, each in a # separate chunk. with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0, chunk_size=15000), ) as writer: positions = [] writer.write_message(sample_message(0, 10000)) positions.append(writer.last_pos) writer.write_record(sample_invalid_message(10000)) positions.append(writer.last_pos) writer.write_message(sample_message(2, 10000)) positions.append(writer.last_pos) writer.close() def recovery(skipped_region): pass with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=recovery, ) as reader: def test_function(search_target): def test(msg): return (msg.id > search_target) - (msg.id < search_target) return test self.assertEqual( reader.search_for_message( records_test_pb2.SimpleMessage, test_function(1) ), 1, ) self.assertEqual(reader.pos, positions[2]) @_PARAMETERIZE_BY_FILE_SPEC def test_search_for_invalid_message_recovery_stop_iteration(self, file_spec): with contextlib.closing( file_spec( self.create_tempfile, random_access=RandomAccess.RANDOM_ACCESS ) ) as files: # Write 1 valid message, 1 invalid message, and 1 valid message, each in a # separate chunk. with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism=0, chunk_size=15000), ) as writer: positions = [] writer.write_message(sample_message(0, 10000)) positions.append(writer.last_pos) writer.write_record(sample_invalid_message(10000)) positions.append(writer.last_pos) writer.write_message(sample_message(2, 10000)) positions.append(writer.last_pos) writer.close() def recovery(skipped_region): raise StopIteration with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=recovery, ) as reader: def test_function(search_target): def test(msg): return (msg.id > search_target) - (msg.id < search_target) return test self.assertIsNone( reader.search_for_message( records_test_pb2.SimpleMessage, test_function(1) ) ) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_corruption_exception(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): writer.write_record(sample_string(i, 10000)) positions.append(writer.last_pos) # Corrupt the header of the chunk containing records [9..12). self.corrupt_at(files, positions[9].chunk_begin + 20) # Read records [0..9) successfully (all before the corrupted chunk). reader = riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) for i in range(9): self.assertEqual(reader.read_record(), sample_string(i, 10000)) with self.assertRaises(riegeli.RiegeliError): reader.read_record() with self.assertRaises(riegeli.RiegeliError): reader.close() @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_corruption_recovery(self, file_spec, random_access, parallelism): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): writer.write_record(sample_string(i, 10000)) positions.append(writer.last_pos) # Corrupt the header of the chunk containing records [9..12). self.corrupt_at(files, positions[9].chunk_begin + 20) # Read records [0..9) and [15..23) successfully (all except the corrupted # chunk and the next chunk which intersects the same block). skipped_regions = [] with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=skipped_regions.append, ) as reader: for i in range(9): self.assertEqual(reader.read_record(), sample_string(i, 10000)) for i in range(15, 23): self.assertEqual(reader.read_record(), sample_string(i, 10000)) self.assertIsNone(reader.read_record()) self.assertLen(skipped_regions, 1) skipped_region = skipped_regions[0] self.assertEqual(skipped_region.begin, positions[9].numeric) self.assertEqual(skipped_region.end, positions[15].numeric) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_corruption_recovery_stop_iteration( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): writer.write_record(sample_string(i, 10000)) positions.append(writer.last_pos) # Corrupt the header of the chunk containing records [9..12). self.corrupt_at(files, positions[9].chunk_begin + 20) # Read records [0..9) successfully (all before the corrupted chunk). skipped_regions = [] def recovery(skipped_region): skipped_regions.append(skipped_region) raise StopIteration with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=recovery, ) as reader: for i in range(9): self.assertEqual(reader.read_record(), sample_string(i, 10000)) self.assertIsNone(reader.read_record()) self.assertLen(skipped_regions, 1) skipped_region = skipped_regions[0] self.assertEqual(skipped_region.begin, positions[9].numeric) self.assertEqual(skipped_region.end, positions[15].numeric) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_corruption_recovery_exception( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(23): writer.write_record(sample_string(i, 10000)) positions.append(writer.last_pos) # Corrupt the header of the chunk containing records [9..12). self.corrupt_at(files, positions[9].chunk_begin + 20) # Propagate exception from the recovery function def recovery(skipped_region): raise KeyboardInterrupt with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=recovery, ) as reader: for i in range(9): self.assertEqual(reader.read_record(), sample_string(i, 10000)) with self.assertRaises(KeyboardInterrupt): reader.read_record() @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_invalid_message_exception( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(9): writer.write_message(sample_message(i, 10000)) for i in range(9, 10): writer.write_record(sample_invalid_message(10000)) for i in range(10, 14): writer.write_message(sample_message(i, 10000)) for i in range(14, 15): writer.write_record(sample_invalid_message(10000)) for i in range(15, 23): writer.write_message(sample_message(i, 10000)) # Read messages [0..9), [10..14), and [15, 23) successfully (all except # invalid messages), raising exceptions for invalid messages with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, ) as reader: for i in range(9): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) with self.assertRaises(message.DecodeError): reader.read_message(records_test_pb2.SimpleMessage) for i in range(10, 14): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) with self.assertRaises(message.DecodeError): reader.read_message(records_test_pb2.SimpleMessage) for i in range(15, 23): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_invalid_message_recovery( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(9): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) for i in range(9, 10): writer.write_record(sample_invalid_message(10000)) positions.append(writer.last_pos) for i in range(10, 14): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) for i in range(14, 15): writer.write_record(sample_invalid_message(10000)) positions.append(writer.last_pos) for i in range(15, 23): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) # Read messages [0..9), [10..14), and [15, 23) successfully (all except # invalid messages). skipped_regions = [] with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=skipped_regions.append, ) as reader: for i in range(9): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) for i in range(10, 14): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) for i in range(15, 23): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage)) self.assertLen(skipped_regions, 2) skipped_region = skipped_regions[0] self.assertEqual(skipped_region.begin, positions[9].numeric) self.assertEqual(skipped_region.end, positions[10].numeric) skipped_region = skipped_regions[1] self.assertEqual(skipped_region.begin, positions[14].numeric) self.assertEqual(skipped_region.end, positions[15].numeric) @_PARAMETERIZE_BY_FILE_SPEC_AND_RANDOM_ACCESS_AND_PARALLELISM def test_invalid_message_recovery_stop_iteration( self, file_spec, random_access, parallelism ): with contextlib.closing( file_spec(self.create_tempfile, random_access) ) as files: positions = [] with riegeli.RecordWriter( files.writing_open(), owns_dest=files.writing_should_close, assumed_pos=files.writing_assumed_pos, options=record_writer_options(parallelism), ) as writer: for i in range(9): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) for i in range(9, 10): writer.write_record(sample_invalid_message(10000)) positions.append(writer.last_pos) for i in range(10, 14): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) for i in range(14, 15): writer.write_record(sample_invalid_message(10000)) positions.append(writer.last_pos) for i in range(15, 23): writer.write_message(sample_message(i, 10000)) positions.append(writer.last_pos) # Read messages [0..9) successfully (all before the first invalid # message). skipped_regions = [] def recovery(skipped_region): skipped_regions.append(skipped_region) raise StopIteration with riegeli.RecordReader( files.reading_open(), owns_src=files.reading_should_close, assumed_pos=files.reading_assumed_pos, recovery=recovery, ) as reader: for i in range(9): self.assertEqual( reader.read_message(records_test_pb2.SimpleMessage), sample_message(i, 10000), ) self.assertIsNone(reader.read_message(records_test_pb2.SimpleMessage)) self.assertLen(skipped_regions, 1) skipped_region = skipped_regions[0] self.assertEqual(skipped_region.begin, positions[9].numeric) self.assertEqual(skipped_region.end, positions[10].numeric) if __name__ == '__main__': absltest.main() ================================================ FILE: python/riegeli/tensorflow/BUILD ================================================ load("@rules_cc//cc:defs.bzl", "cc_binary") load("@rules_python//python:defs.bzl", "py_library", "py_test") package( default_visibility = ["//visibility:public"], features = ["header_modules"], ) licenses(["notice"]) py_library( name = "riegeli_dataset_ops", srcs = ["ops/riegeli_dataset_ops.py"], data = [":ops/_riegeli_dataset_ops.so"], ) cc_binary( name = "ops/_riegeli_dataset_ops.so", srcs = [ "//riegeli/tensorflow:kernels/riegeli_dataset_ops.cc", "//riegeli/tensorflow:ops/riegeli_dataset_ops.cc", ], # tensorflow/core/lib/core/refcount.h needs NDEBUG consistency between # translation units. copts = ["-DNDEBUG"], linkshared = True, deps = [ "//riegeli/base:arithmetic", "//riegeli/records:record_position", "//riegeli/records:record_reader", "//riegeli/records:skipped_region", "//riegeli/tensorflow/io:file_reader", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/synchronization", "@local_config_tf//:libtensorflow_framework", "@local_config_tf//:tf_header_lib", ], ) py_test( name = "riegeli_dataset_test", srcs = ["kernel_tests/riegeli_dataset_test.py"], deps = [ ":riegeli_dataset_ops", "//python/riegeli", ], ) ================================================ FILE: python/riegeli/tensorflow/__init__.py ================================================ ================================================ FILE: python/riegeli/tensorflow/kernel_tests/__init__.py ================================================ ================================================ FILE: python/riegeli/tensorflow/kernel_tests/riegeli_dataset_test.py ================================================ # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Tests for RiegeliDataset.""" import os import riegeli from riegeli.tensorflow.ops import riegeli_dataset_ops import tensorflow as tf from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.eager import context from tensorflow.python.framework import errors from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import test_util from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test # Adapted from tensorflow/python/data/kernel_tests/test_base.py # which has restricted visibility. class DatasetTestBase(test.TestCase): """Base class for dataset tests.""" def assertValuesEqual(self, expected, actual): """Asserts that two values are equal.""" if sparse_tensor.is_sparse(expected): self.assertAllEqual(expected.indices, actual.indices) self.assertAllEqual(expected.values, actual.values) self.assertAllEqual(expected.dense_shape, actual.dense_shape) else: self.assertAllEqual(expected, actual) def getNext(self, dataset, requires_initialization=False, shared_name=None): """Returns a callable that returns the next element of the dataset. Example use: ```python # In both graph and eager modes dataset = ... get_next = self.getNext(dataset) result = self.evaluate(get_next()) ``` Args: dataset: A dataset whose elements will be returned. requires_initialization: Indicates that when the test is executed in graph mode, it should use an initializable iterator to iterate through the dataset (e.g. when it contains stateful nodes). Defaults to False. shared_name: (Optional.) If non-empty, the returned iterator will be shared under the given name across multiple sessions that share the same devices (e.g. when using a remote server). Returns: A callable that returns the next element of `dataset`. Any `TensorArray` objects `dataset` outputs are stacked. """ def ta_wrapper(gn): def _wrapper(): r = gn() if isinstance(r, tensor_array_ops.TensorArray): return r.stack() else: return r return _wrapper if context.executing_eagerly(): iterator = iter(dataset) return ta_wrapper(iterator._next_internal) # pylint: disable=protected-access else: if requires_initialization: iterator = dataset_ops.make_initializable_iterator(dataset, shared_name) self.evaluate(iterator.initializer) else: iterator = dataset_ops.make_one_shot_iterator(dataset) get_next = iterator.get_next() return ta_wrapper(lambda: get_next) def _compareOutputToExpected( self, result_values, expected_values, assert_items_equal ): if assert_items_equal: self.assertItemsEqual(result_values, expected_values) return for i in range(len(result_values)): nest.assert_same_structure(result_values[i], expected_values[i]) for result_value, expected_value in zip( nest.flatten(result_values[i]), nest.flatten(expected_values[i]) ): self.assertValuesEqual(expected_value, result_value) def assertDatasetProduces( self, dataset, expected_output=None, expected_shapes=None, expected_error=None, requires_initialization=False, num_test_iterations=1, assert_items_equal=False, expected_error_iter=1, ): """Asserts that a dataset produces the expected output / error. Args: dataset: A dataset to check for the expected output / error. expected_output: A list of elements that the dataset is expected to produce. expected_shapes: A list of TensorShapes which is expected to match output_shapes of dataset. expected_error: A tuple `(type, predicate)` identifying the expected error `dataset` should raise. The `type` should match the expected exception type, while `predicate` should either be 1) a unary function that inputs the raised exception and returns a boolean indicator of success or 2) a regular expression that is expected to match the error message partially. requires_initialization: Indicates that when the test is executed in graph mode, it should use an initializable iterator to iterate through the dataset (e.g. when it contains stateful nodes). Defaults to False. num_test_iterations: Number of times `dataset` will be iterated. Defaults to 2. assert_items_equal: Tests expected_output has (only) the same elements regardless of order. expected_error_iter: How many times to iterate before expecting an error, if an error is expected. """ self.assertTrue( expected_error is not None or expected_output is not None, 'Exactly one of expected_output or expected error should be provided.', ) if expected_error: self.assertTrue( expected_output is None, ( 'Exactly one of expected_output or expected error should be' ' provided.' ), ) with self.assertRaisesWithPredicateMatch( expected_error[0], expected_error[1] ): get_next = self.getNext( dataset, requires_initialization=requires_initialization ) for _ in range(expected_error_iter): self.evaluate(get_next()) return if expected_shapes: self.assertEqual( expected_shapes, dataset_ops.get_legacy_output_shapes(dataset) ) self.assertGreater(num_test_iterations, 0) for _ in range(num_test_iterations): get_next = self.getNext( dataset, requires_initialization=requires_initialization ) result = [] for _ in range(len(expected_output)): result.append(self.evaluate(get_next())) self._compareOutputToExpected(result, expected_output, assert_items_equal) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next()) with self.assertRaises(errors.OutOfRangeError): self.evaluate(get_next()) @test_util.run_all_in_graph_and_eager_modes class RiegeliDatasetTest(DatasetTestBase): def setUp(self): super().setUp() self._num_files = 2 self._num_records = 7 self.test_filenames = self._create_files() def dataset_fn(self, filenames, num_epochs=1, batch_size=None): repeat_dataset = riegeli_dataset_ops.RiegeliDataset(filenames).repeat( num_epochs ) if batch_size: return repeat_dataset.batch(batch_size) return repeat_dataset def _record(self, f, r): return f'Record {r} of file {f}'.encode() def _create_files(self): filenames = [] for i in range(self._num_files): filename = os.path.join(self.get_temp_dir(), f'riegeli.{i}') filenames.append(filename) # Note: if records were serialized proto messages, passing # options='transpose' to RecordWriter would make compression better. with riegeli.RecordWriter(tf.io.gfile.GFile(filename, 'wb')) as writer: for j in range(self._num_records): writer.write_record(self._record(i, j)) return filenames def test_read_one_epoch(self): # Basic test: read from file 0. dataset = self.dataset_fn(self.test_filenames[0]) self.assertDatasetProduces( dataset, expected_output=[self._record(0, i) for i in range(self._num_records)], ) # Basic test: read from file 1. dataset = self.dataset_fn(self.test_filenames[1]) self.assertDatasetProduces( dataset, expected_output=[self._record(1, i) for i in range(self._num_records)], ) # Basic test: read from both files. dataset = self.dataset_fn(self.test_filenames) expected_output = [] for j in range(self._num_files): expected_output.extend( [self._record(j, i) for i in range(self._num_records)] ) self.assertDatasetProduces(dataset, expected_output=expected_output) def test_read_ten_epochs(self): dataset = self.dataset_fn(self.test_filenames, num_epochs=10) expected_output = [] for j in range(self._num_files): expected_output.extend( [self._record(j, i) for i in range(self._num_records)] ) self.assertDatasetProduces(dataset, expected_output=expected_output * 10) def test_read_ten_epochs_of_batches(self): dataset = self.dataset_fn( self.test_filenames, num_epochs=10, batch_size=self._num_records ) expected_output = [] for j in range(self._num_files): expected_output.append( [self._record(j, i) for i in range(self._num_records)] ) self.assertDatasetProduces(dataset, expected_output=expected_output * 10) if __name__ == '__main__': tf.test.main() ================================================ FILE: python/riegeli/tensorflow/ops/__init__.py ================================================ ================================================ FILE: python/riegeli/tensorflow/ops/riegeli_dataset_ops.py ================================================ # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """TensorFlow dataset for Riegeli/records files.""" import tensorflow as tf from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import convert from tensorflow.python.framework import load_library gen_riegeli_dataset_ops = load_library.load_op_library( tf.compat.v1.resource_loader.get_path_to_datafile('_riegeli_dataset_ops.so') ) __all__ = ('RiegeliDataset',) _DEFAULT_MIN_BUFFER_SIZE = 4 << 10 _DEFAULT_MAX_BUFFER_SIZE = 64 << 10 class RiegeliDataset(dataset_ops.DatasetSource): """A `Dataset` comprising records from one or more Riegeli/records files.""" __slots__ = ('_filenames', '_min_buffer_size', '_max_buffer_size') def __init__( self, filenames, min_buffer_size=None, max_buffer_size=None, buffer_size=None, ): """Creates a `RiegeliDataset`. Args: filenames: A `tf.string` tensor containing one or more filenames. min_buffer_size: A `tf.int64` scalar which tunes the minimal buffer size, which determines how much data at a time is typically read from the file. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. Default: 4K. max_buffer_size: A `tf.int64` scalar which tunes the maximal buffer size, which determines how much data at a time is typically read from the file. The actual buffer size changes between min_buffer_size and max_buffer_size depending on the access pattern. Default: 64K. buffer_size: If not None, a shortcut for setting min_buffer_size and max_buffer_size to the same value. """ if buffer_size is not None: min_buffer_size = buffer_size max_buffer_size = buffer_size self._filenames = tf.convert_to_tensor(filenames, name='filenames') self._min_buffer_size = convert.optional_param_to_tensor( 'min_buffer_size', min_buffer_size, argument_default=_DEFAULT_MIN_BUFFER_SIZE, ) self._max_buffer_size = convert.optional_param_to_tensor( 'max_buffer_size', max_buffer_size, argument_default=_DEFAULT_MAX_BUFFER_SIZE, ) variant_tensor = gen_riegeli_dataset_ops.riegeli_dataset( self._filenames, self._min_buffer_size, self._max_buffer_size ) super().__init__(variant_tensor) @property def element_spec(self): return tf.TensorSpec([], tf.dtypes.string) ================================================ FILE: python/setup.py ================================================ # Copyright 2019 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """PIP package setup for Riegeli.""" import setuptools from setuptools import dist with open('README.md', 'r') as fh: long_description = fh.read() class BinaryDistribution(dist.Distribution): """This class is needed in order to create OS specific wheels.""" def has_ext_modules(self): return True setuptools.setup( name='riegeli', version='0.0.1', description='File format for storing a sequence of records', long_description=long_description, long_description_content_type='text/markdown', url='https://github.com/google/riegeli', author='Google LLC', author_email='compression-dev@google.com', license='Apache License, Version 2.0', python_requires='>=3.5,<4', install_requires=[ 'protobuf>=3.8.0,<4', ], extras_require={ 'tensorflow': ['tensorflow>=1.15,<3'], }, packages=setuptools.find_packages(), include_package_data=True, package_data={'': ['**/*.so']}, distclass=BinaryDistribution, classifiers=[ 'Programming Language :: Python', 'Intended Audience :: Developers', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', ], ) ================================================ FILE: riegeli/.gitignore ================================================ bazel-* ================================================ FILE: riegeli/BUILD ================================================ # Riegeli, file format for storing a sequence of records. package( default_visibility = ["//visibility:public"], features = ["header_modules"], ) licenses(["notice"]) exports_files(["LICENSE"]) ================================================ FILE: riegeli/base/BUILD ================================================ load("@rules_cc//cc:defs.bzl", "cc_library") package( default_visibility = ["//visibility:public"], features = ["header_modules"], ) licenses(["notice"]) cc_library( name = "type_traits", hdrs = ["type_traits.h"], deps = [ "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/utility", ], ) cc_library( name = "constexpr", srcs = ["port.h"], hdrs = ["constexpr.h"], deps = ["@com_google_absl//absl/base:nullability"], ) cc_library( name = "null_safe_memcpy", hdrs = ["null_safe_memcpy.h"], deps = [ "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "compare", hdrs = ["compare.h"], deps = [ "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:compare", ], ) cc_library( name = "iterable", hdrs = ["iterable.h"], deps = [ ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "stream_utils", srcs = ["stream_utils.cc"], hdrs = ["stream_utils.h"], deps = [ ":types", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "debug", srcs = ["debug.cc"], hdrs = ["debug.h"], deps = [ ":stream_utils", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", ], ) cc_library( name = "assert", srcs = [ "assert.cc", "port.h", ], hdrs = ["assert.h"], deps = [ ":debug", ":stream_utils", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/log:absl_log", ], ) cc_library( name = "types", hdrs = ["types.h"], deps = ["@com_google_absl//absl/base:nullability"], ) cc_library( name = "arithmetic", hdrs = ["arithmetic.h"], deps = [ ":assert", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/numeric:int128", ], ) cc_library( name = "buffering", hdrs = ["buffering.h"], deps = [ ":arithmetic", ":types", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "estimated_allocated_size", hdrs = ["estimated_allocated_size.h"], deps = [ ":arithmetic", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "new_aligned", hdrs = ["new_aligned.h"], deps = [ ":arithmetic", ":assert", ":estimated_allocated_size", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/numeric:bits", ], ) cc_library( name = "string_utils", srcs = ["string_utils.cc"], hdrs = ["string_utils.h"], deps = [ ":arithmetic", ":assert", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:resize_and_overwrite", ], ) cc_library( name = "cord_utils", srcs = ["cord_utils.cc"], hdrs = ["cord_utils.h"], deps = [ ":arithmetic", ":assert", ":buffering", ":string_utils", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "unicode", srcs = ["unicode.cc"], hdrs = ["unicode.h"], features = select({ # unicode.cc has #define before #include to influence what the included # files provide. "@platforms//os:windows": ["-use_header_modules"], "//conditions:default": [], }), deps = select({ "@platforms//os:windows": [ ":arithmetic", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:resize_and_overwrite", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", ], "//conditions:default": [], }), ) cc_library( name = "type_id", hdrs = ["type_id.h"], deps = [ ":compare", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "reset", hdrs = ["reset.h"], deps = [ ":assert", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "type_erased_ref", hdrs = ["type_erased_ref.h"], deps = [ ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/meta:type_traits", ], ) cc_library( name = "initializer", srcs = ["initializer_internal.h"], hdrs = [ "initializer.h", "invoker.h", "maker.h", "temporary_storage.h", ], deps = [ ":assert", ":reset", ":type_erased_ref", ":type_traits", "@com_google_absl//absl/base", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "global", hdrs = ["global.h"], deps = ["@com_google_absl//absl/base:nullability"], ) cc_library( name = "string_ref", hdrs = ["string_ref.h"], deps = [ ":assert", ":compare", ":initializer", ":type_traits", "@com_google_absl//absl/base:config", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "bytes_ref", hdrs = ["bytes_ref.h"], deps = [ ":compare", ":initializer", ":string_ref", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", ], ) cc_library( name = "c_string_ref", hdrs = ["c_string_ref.h"], deps = [ ":compare", ":initializer", ":string_ref", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "memory_estimator", srcs = ["memory_estimator.cc"], hdrs = ["memory_estimator.h"], deps = [ ":arithmetic", ":estimated_allocated_size", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/container:node_hash_set", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "closing_ptr", hdrs = ["closing_ptr.h"], deps = [ "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "dependency", hdrs = [ "dependency.h", "dependency_base.h", "dependency_manager.h", ], deps = [ ":assert", ":bytes_ref", ":compare", ":initializer", ":reset", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/meta:type_traits", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", ], ) cc_library( name = "moving_dependency", hdrs = ["moving_dependency.h"], deps = [ ":dependency", ":type_traits", "@com_google_absl//absl/base:core_headers", ], ) cc_library( name = "stable_dependency", hdrs = ["stable_dependency.h"], deps = [ ":assert", ":dependency", ":initializer", ":type_traits", "@com_google_absl//absl/base:core_headers", ], ) cc_library( name = "any", srcs = ["any_internal.h"], hdrs = [ "any.h", "any_initializer.h", ], deps = [ ":arithmetic", ":assert", ":closing_ptr", ":compare", ":dependency", ":initializer", ":memory_estimator", ":type_erased_ref", ":type_id", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/meta:type_traits", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "cord_iterator_span", srcs = ["cord_iterator_span.cc"], hdrs = ["cord_iterator_span.h"], deps = [ ":arithmetic", ":assert", ":dependency", ":string_utils", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:resize_and_overwrite", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "status", srcs = [ "errno_mapping.cc", "status.cc", ], hdrs = [ "errno_mapping.h", "status.h", ], features = select({ # errno_mapping.cc has #define before #include to influence what the # included files provide. "@platforms//os:windows": ["-use_header_modules"], "//conditions:default": [], }), deps = [ "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:string_view", ] + select({ "@platforms//os:windows": [ ":arithmetic", ":unicode", "@com_google_absl//absl/types:span", ], "//conditions:default": [], }), ) cc_library( name = "object", srcs = ["object.cc"], hdrs = ["object.h"], deps = [ ":assert", ":initializer", ":type_id", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", ], ) cc_library( name = "external_data", srcs = ["external_data.cc"], hdrs = ["external_data.h"], deps = ["@com_google_absl//absl/strings:string_view"], ) cc_library( name = "shared_ptr", hdrs = [ "intrusive_shared_ptr.h", "ownership.h", "ref_count.h", "shared_ptr.h", ], deps = [ ":arithmetic", ":assert", ":compare", ":external_data", ":initializer", ":new_aligned", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", ], ) cc_library( name = "buffer", srcs = ["buffer.cc"], hdrs = ["buffer.h"], deps = [ ":arithmetic", ":assert", ":buffering", ":estimated_allocated_size", ":external_data", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "shared_buffer", srcs = ["shared_buffer.cc"], hdrs = ["shared_buffer.h"], deps = [ ":arithmetic", ":assert", ":buffer", ":external_data", ":initializer", ":shared_ptr", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "sized_shared_buffer", srcs = ["sized_shared_buffer.cc"], hdrs = ["sized_shared_buffer.h"], deps = [ ":arithmetic", ":assert", ":buffering", ":shared_buffer", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", ], ) cc_library( name = "chain_and_external_ref", srcs = ["chain.cc"], hdrs = [ "chain_base.h", "chain_details.h", "external_ref_base.h", "external_ref_support.h", ], visibility = ["//visibility:private"], deps = [ ":arithmetic", ":assert", ":buffering", ":bytes_ref", ":compare", ":cord_utils", ":external_data", ":initializer", ":iterable", ":memory_estimator", ":new_aligned", ":null_safe_memcpy", ":shared_ptr", ":stream_utils", ":string_utils", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/meta:type_traits", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:resize_and_overwrite", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", ], ) cc_library( name = "chain", hdrs = ["chain.h"], deps = [":chain_and_external_ref"], ) cc_library( name = "external_ref", hdrs = ["external_ref.h"], deps = [":chain_and_external_ref"], ) cc_library( name = "byte_fill", srcs = ["byte_fill.cc"], hdrs = ["byte_fill.h"], deps = [ ":arithmetic", ":assert", ":chain", ":compare", ":cord_utils", ":external_data", ":external_ref", ":global", ":iterable", ":shared_buffer", ":types", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/numeric:bits", "@com_google_absl//absl/strings:cord", "@com_google_absl//absl/strings:string_view", "@com_google_absl//absl/types:span", ], ) cc_library( name = "uninitialized_vector", hdrs = ["uninitialized_vector.h"], deps = [ "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:inlined_vector", ], ) cc_library( name = "compact_string", srcs = ["compact_string.cc"], hdrs = ["compact_string.h"], deps = [ ":arithmetic", ":assert", ":bytes_ref", ":compare", ":estimated_allocated_size", ":external_data", ":new_aligned", ":null_safe_memcpy", ":type_traits", "@com_google_absl//absl/base:config", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/hash", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "optional_compact_string", hdrs = ["optional_compact_string.h"], deps = [ ":assert", ":bytes_ref", ":compact_string", ":compare", ":iterable", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/strings:string_view", ], ) cc_library( name = "hybrid_direct_map", srcs = [ "hybrid_direct_common.h", "hybrid_direct_internal.h", ], hdrs = ["hybrid_direct_map.h"], deps = [ ":arithmetic", ":assert", ":compare", ":debug", ":initializer", ":iterable", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:flat_hash_map", ], ) cc_library( name = "hybrid_direct_set", srcs = [ "hybrid_direct_common.h", "hybrid_direct_internal.h", ], hdrs = ["hybrid_direct_set.h"], deps = [ ":arithmetic", ":assert", ":compare", ":iterable", ":type_traits", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/base:nullability", "@com_google_absl//absl/container:flat_hash_set", ], ) cc_library( name = "binary_search", hdrs = ["binary_search.h"], deps = [ ":compare", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/status", "@com_google_absl//absl/status:statusor", ], ) cc_library( name = "parallelism", srcs = ["parallelism.cc"], hdrs = ["parallelism.h"], visibility = ["//riegeli:__subpackages__"], deps = [ ":assert", ":global", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", ], ) cc_library( name = "background_cleaning", srcs = ["background_cleaning.cc"], hdrs = ["background_cleaning.h"], deps = [ ":assert", ":global", ":parallelism", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", ], ) cc_library( name = "recycling_pool", hdrs = ["recycling_pool.h"], deps = [ ":arithmetic", ":assert", ":background_cleaning", ":compare", ":global", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/container:node_hash_map", "@com_google_absl//absl/synchronization", "@com_google_absl//absl/time", ], ) cc_library( name = "options_parser", srcs = ["options_parser.cc"], hdrs = ["options_parser.h"], deps = [ ":assert", ":initializer", ":object", ":string_ref", "@com_google_absl//absl/base:core_headers", "@com_google_absl//absl/functional:any_invocable", "@com_google_absl//absl/status", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:string_view", ], ) ================================================ FILE: riegeli/base/any.h ================================================ // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ANY_H_ #define RIEGELI_BASE_ANY_H_ #include #include #include #include #include #include // IWYU pragma: keep #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "absl/strings/string_view.h" #include "riegeli/base/any_initializer.h" #include "riegeli/base/any_internal.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/closing_ptr.h" #include "riegeli/base/compare.h" #include "riegeli/base/dependency.h" #include "riegeli/base/dependency_base.h" #include "riegeli/base/dependency_manager.h" #include "riegeli/base/initializer.h" #include "riegeli/base/memory_estimator.h" #include "riegeli/base/temporary_storage.h" #include "riegeli/base/type_id.h" #include "riegeli/base/type_traits.h" namespace riegeli { namespace any_internal { // Common base class of `Any` and `AnyRef`. template class AnyBase : public WithEqual> { public: // Returns a `Handle` to the `Manager`, or a default `Handle` for an empty // `AnyBase`. Handle get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return methods_and_handle_.handle; } // If `Handle` supports `operator*`, `AnyBase` can be used as a smart // pointer to the result of `operator*`, for convenience. template ::value, int> = 0> decltype(*std::declval()) operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { AssertNotNull("Failed precondition of AnyBase::operator*: null handle"); return *get(); } template ::value, int> = 0> Handle operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { AssertNotNull("Failed precondition of AnyBase::operator->: null handle"); return get(); } // `AnyBase` can be compared against `nullptr`. If `Handle` supports // `operator==` with `nullptr`, then delegates the comparison to `Handle`, // otherwise returns `true` for a non-empty `AnyBase`. friend bool operator==(const AnyBase& a, std::nullptr_t) { return a.EqualNullptr(); } // If `true`, the `AnyBase` owns the dependent object, i.e. closing the host // object should close the dependent object. bool IsOwning() const { return methods_and_handle_.methods->is_owning(repr_.storage); } // If `true`, `get()` stays unchanged when an `AnyBase` is moved. static constexpr bool kIsStable = inline_size == 0; // If the stored `Manager` has exactly this type or a reference to it, returns // a pointer to the `Manager`. Otherwise returns `nullptr`. template < typename Manager, std::enable_if_t::value, int> = 0> Manager* GetIf() ABSL_ATTRIBUTE_LIFETIME_BOUND; template < typename Manager, std::enable_if_t::value, int> = 0> const Manager* GetIf() const ABSL_ATTRIBUTE_LIFETIME_BOUND; // Returns the `TypeId` corresponding to the stored `Manager` type, stripping // any toplevel reference. TypeId type_id() const { return methods_and_handle_.methods->type_id; } // Returns `true` when the stored `Manager` has exactly this type or a // reference to it. // // Same as `type_id() == TypeId::For()`. // // Same as `GetIf() != nullptr` but more efficient if the type // matches. template < typename Manager, std::enable_if_t::value, int> = 0> bool Holds() const { return type_id() == TypeId::For(); } // Supports `MemoryEstimator`. friend void RiegeliRegisterSubobjects(const AnyBase* self, MemoryEstimator& memory_estimator) { self->methods_and_handle_.methods->register_subobjects(self->repr_.storage, memory_estimator); } protected: // The state is left uninitialized. AnyBase() noexcept {} AnyBase(AnyBase&& that) noexcept; AnyBase& operator=(AnyBase&& that) noexcept; ~AnyBase() { Destroy(); } void Reset(std::nullptr_t = nullptr); // Initializes the state. // // If `Manager` is already a compatible `Any` or `AnyRef`, possibly wrapped in // `ClosingPtrType`, or an rvalue reference to it, adopts its storage instead // of keeping an indirection. This causes `GetIf()` to see through it. void Initialize(); template < typename Manager, std::enable_if_t< std::conjunction_v>, std::negation>>, int> = 0> void Initialize(Manager&& manager); template ::value, int> = 0> void Initialize(Manager&& manager); template ::value, int> = 0> void Initialize(Manager&& manager); template < typename Manager, std::enable_if_t< std::conjunction_v>, std::negation>>, int> = 0> void Initialize(Initializer manager); template ::value, int> = 0> void Initialize(Initializer manager); template ::value, int> = 0> void Initialize(Initializer manager); void InitializeFromAnyInitializer(AnyInitializer manager); template , int> = 0> void Adopt(Manager&& manager); template , int> = 0> void Adopt(Manager&& manager); // Destroys the state, leaving it uninitialized. void Destroy(); private: // For adopting the state from an instantiation with a different `inline_size` // and `inline_align`. template friend class AnyBase; // For adopting the state from an instantiation held in an `AnyInitializer`. friend class AnyInitializer; using Repr = any_internal::Repr; using MethodsAndHandle = any_internal::MethodsAndHandle; using NullMethods = any_internal::NullMethods; template using MethodsFor = any_internal::MethodsFor< Handle, Manager, IsInline()>; static constexpr size_t kAvailableSize = AvailableSize(); static constexpr size_t kAvailableAlign = AvailableAlign(); template ::value, int> = 0> void AssertNotNull(absl::string_view message) const { RIEGELI_ASSERT(get() != nullptr) << message; } template ::value, int> = 0> void AssertNotNull(ABSL_ATTRIBUTE_UNUSED absl::string_view message) const {} template ::value, int> = 0> bool EqualNullptr() const { return get() == nullptr; } template ::value, int> = 0> bool EqualNullptr() const { return methods_and_handle_.methods == &NullMethods::kMethods; } MethodsAndHandle methods_and_handle_; Repr repr_; }; } // namespace any_internal // `Any` refers to an optionally owned object which is accessed as // `Handle` and stored as some `Manager` type decided when the `Any` is // initialized. // // Often `Handle` is some pointer `Base*`, and then `Manager` can be e.g. // `T*` (not owned), `T` (owned), or `std::unique_ptr` (owned), with some `T` // derived from `Base`. // // `Any` holds a `Dependency` for some `Manager` type, // erasing the `Manager` parameter from the type of the `Any`, or is empty. template class ABSL_NULLABILITY_COMPATIBLE Any : public any_internal::AnyBase { private: // Indirection through `InliningImpl` is needed for MSVC for some reason. template struct InliningImpl { using type = Any)...), UnsignedMax(inline_align, alignof(Dependency)...)>; }; public: // `Any::Inlining` enlarges inline storage of // `Any`. // // `InlineManagers` specify the size of inline storage, which allows to avoid // heap allocation if `Manager` is among `InlineManagers`, or if // `Dependency` fits there regarding size and alignment. // By default inline storage is enough for a pointer. template using Inlining = typename InliningImpl::type; // Creates an empty `Any`. Any() noexcept { this->Initialize(); } /*implicit*/ Any(std::nullptr_t) { this->Initialize(); } // Holds a `Dependency>`. // // If `TargetT` is already a compatible `Any` or `AnyRef`, possibly // wrapped in `ClosingPtrType`, or an rvalue reference to it, adopts its // storage instead of keeping an indirection. This causes `GetIf()` to see // through it. template >, TargetSupportsDependency>, int> = 0> /*implicit*/ Any(Manager&& manager); template >, TargetSupportsDependency>, int> = 0> Any& operator=(Manager&& manager); // Holds the `Dependency` specified when the `AnyInitializer` was constructed. // // `AnyInitializer` is accepted as a template parameter to avoid this // constructor triggering implicit conversions of other parameter types to // `AnyInitializer`, which causes template instantiation cycles. template >, int> = 0> /*implicit*/ Any(Manager manager); template >, int> = 0> Any& operator=(Manager manager); // Assignment operator which materializes `Any` from its `Initializer` // except from the `Any` itself, which is handled below. template >, NotSameRef>, int> = 0> Any& operator=(Manager&& manager) { riegeli::Reset(*this, std::forward(manager)); return *this; } Any(Any&& that) = default; Any& operator=(Any&& that) = default; // Makes `*this` equivalent to a newly constructed `Any`. This avoids // constructing a temporary `Any` and moving from it. ABSL_ATTRIBUTE_REINITIALIZES void Reset(std::nullptr_t = nullptr) { Any::AnyBase::Reset(); } private: // For `ABSL_NULLABILITY_COMPATIBLE`. using pointer = std::conditional_t, Handle, void*>; }; // Specialization of `DependencyManagerImpl>`: // a dependency with ownership determined at runtime. template class DependencyManagerImpl, ManagerStorage> : public DependencyBase { public: using DependencyManagerImpl::DependencyBase::DependencyBase; bool IsOwning() const { return this->manager().IsOwning(); } static constexpr bool kIsStable = DependencyManagerImpl::DependencyBase::kIsStable || Any::kIsStable; protected: DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager().get(); } }; // Specialization of // `DependencyManagerImpl>>`: // a dependency with ownership determined at runtime. template class DependencyManagerImpl< std::unique_ptr, NullDeleter>, ManagerStorage> : public DependencyBase, NullDeleter>> { public: using DependencyManagerImpl::DependencyBase::DependencyBase; bool IsOwning() const { return this->manager() != nullptr && this->manager()->IsOwning(); } static constexpr bool kIsStable = true; protected: DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager()->get(); } }; // `AnyRef` refers to an optionally owned object which is accessed as // `Handle` and was passed as some `Manager` type decided when the `AnyRef` was // initialized. // // Often `Handle` is some pointer `Base*`, and then `Manager` can be e.g. // `T&` (not owned), `T&&` (owned), or `std::unique_ptr` (owned), with some // `T` derived from `Base`. // // `AnyRef` holds a `Dependency` (which collapses to // `Dependency` if `Manager` is itself an lvalue reference) // for some `Manager` type, erasing the `Manager` parameter from the type of the // `AnyRef`, or is empty. // // `AnyRef(manager)` does not own `manager`, even if it involves // temporaries, hence it should be used only as a parameter of a function or // constructor, so that the temporaries outlive its usage. Instead of storing an // `AnyRef` in a variable or returning it from a function, consider // `riegeli::OwningMaker()`, `MakerTypeFor`, // or `Any`. // // This allows to pass an unowned dependency by lvalue reference instead of by // pointer, which allows for a more idiomatic API for passing an object which // does not need to be valid after the function returns. And this allows to pass // an owned dependency by rvalue reference instead of by value, which avoids // moving it. template class ABSL_NULLABILITY_COMPATIBLE AnyRef : public any_internal::AnyBase { public: // Creates an empty `AnyRef`. AnyRef() noexcept { this->Initialize(); } /*implicit*/ AnyRef(std::nullptr_t) { this->Initialize(); } // Holds a `Dependency&&>` when // `TargetRefT` is not a reference. // // If `TargetT` is already a compatible `Any` or `AnyRef`, possibly // wrapped in `ClosingPtrType`, points to its storage instead of keeping an // indirection. This causes `GetIf()` to see through it. template >, NotSameRef, TargetT>, std::negation>>, SupportsDependency&&>>, int> = 0> /*implicit*/ AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND, TemporaryStorage>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}); // Holds a `DependencyRef` when `TargetRefT` is a // reference. // // If `TargetT` is an rvalue reference to an already a compatible // `Any` or `AnyRef`, possibly wrapped in `ClosingPtrType`, points to its // storage instead of keeping an indirection. This causes `GetIf()` to see // through it. // // This constructor is separate so that it does not need `storage`. template >, NotSameRef, TargetT>, std::is_reference>, TargetRefSupportsDependency>, int> = 0> /*implicit*/ AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND); // Adopts the `Dependency` from `Any` with no inline storage. // // This constructor is separate so that it does not need temporary storage nor // `ABSL_ATTRIBUTE_LIFETIME_BOUND`. template < typename Manager, std::enable_if_t, Any>, int> = 0> /*implicit*/ AnyRef(Manager&& manager); // Holds the `Dependency` specified when the `AnyInitializer` was constructed. // // Prefer taking parameters as `AnyRef` instead of // `AnyInitializer` if they are ultimately always converted to // `AnyRef`, because this constructor may involve heap allocation. // // `AnyInitializer` is accepted as a template parameter to avoid this // constructor triggering implicit conversions of other parameter types to // `AnyInitializer`, which causes template instantiation cycles. template >, int> = 0> /*implicit*/ AnyRef(Manager manager); AnyRef(AnyRef&& that) = default; AnyRef& operator=(AnyRef&&) = delete; private: // For `ABSL_NULLABILITY_COMPATIBLE`. using pointer = std::conditional_t, Handle, void*>; }; // Specialization of `DependencyManagerImpl>`: // a dependency with ownership determined at runtime. template class DependencyManagerImpl, ManagerStorage> : public DependencyBase { public: using DependencyManagerImpl::DependencyBase::DependencyBase; bool IsOwning() const { return this->manager().IsOwning(); } static constexpr bool kIsStable = DependencyManagerImpl::DependencyBase::kIsStable || AnyRef::kIsStable; protected: DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager().get(); } }; // Specialization of // `DependencyManagerImpl, Deleter>>`: // a dependency with ownership determined at runtime. template class DependencyManagerImpl, NullDeleter>, ManagerStorage> : public DependencyBase, NullDeleter>> { public: using DependencyManagerImpl::DependencyBase::DependencyBase; bool IsOwning() const { return this->manager() != nullptr && this->manager()->IsOwning(); } static constexpr bool kIsStable = true; protected: DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; Handle ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager()->get(); } }; // Type-erased object like `absl::string_view`, `std::string` or `const char*` // which stores and possibly owns a string. // // Do not mutate a stored `std::string` in-place through `GetIf()` if its length // changes. Assign a new one. using AnyString = Any::Inlining; // Implementation details follow. namespace any_internal { template inline AnyBase::AnyBase( AnyBase&& that) noexcept { that.methods_and_handle_.methods->move(that.repr_.storage, repr_.storage, &methods_and_handle_); that.methods_and_handle_.methods = &NullMethods::kMethods; that.methods_and_handle_.handle = SentinelHandle(); } template inline AnyBase& AnyBase::operator=(AnyBase&& that) noexcept { if (ABSL_PREDICT_TRUE(&that != this)) { Destroy(); that.methods_and_handle_.methods->move(that.repr_.storage, repr_.storage, &methods_and_handle_); that.methods_and_handle_.methods = &NullMethods::kMethods; that.methods_and_handle_.handle = SentinelHandle(); } return *this; } template inline void AnyBase::Initialize() { methods_and_handle_.methods = &NullMethods::kMethods; new (&methods_and_handle_.handle) Handle(any_internal::SentinelHandle()); } template template >, std::negation>>, int>> inline void AnyBase::Initialize( Manager&& manager) { Initialize(Initializer(std::forward(manager))); } template template ::value, int>> inline void AnyBase::Initialize( Manager&& manager) { using ManagerValue = std::remove_reference_t; // `manager.methods_and_handle_.methods->used_size <= // ManagerValue::kAvailableSize`, hence if // `ManagerValue::kAvailableSize <= kAvailableSize` then // `manager.methods_and_handle_.methods->used_size <= kAvailableSize`. // No need to check possibly at runtime. if ((ManagerValue::kAvailableSize <= kAvailableSize || manager.methods_and_handle_.methods->used_size <= kAvailableSize) && // Same for alignment. (ManagerValue::kAvailableAlign <= kAvailableAlign || manager.methods_and_handle_.methods->used_align <= kAvailableAlign)) { // Adopt `manager` by moving its representation as is. manager.methods_and_handle_.methods->move( manager.repr_.storage, repr_.storage, &methods_and_handle_); manager.methods_and_handle_.methods = &NullMethods::kMethods; manager.methods_and_handle_.handle = SentinelHandle(); return; } // Adopt `manager` by moving its representation to the heap if `Manager` is // a value, or referring to it if `Manager` is a reference. Adopt(std::forward(manager)); } template template ::value, int>> inline void AnyBase::Initialize( Manager&& manager) { if (manager == nullptr) { Initialize(); return; } // Adopt `*manager` by referring to its representation. manager->methods_and_handle_.methods->make_reference( manager->repr_.storage, repr_.storage, &methods_and_handle_); } template template >, std::negation>>, int>> inline void AnyBase::Initialize( Initializer manager) { methods_and_handle_.methods = &MethodsFor::kMethods; MethodsFor::Construct(repr_.storage, &methods_and_handle_.handle, std::move(manager)); } template template ::value, int>> inline void AnyBase::Initialize( Initializer manager) { // Materialize `Manager` to adopt its storage. Initialize(std::move(manager).Reference()); } template template ::value, int>> inline void AnyBase::Initialize( Initializer manager) { // Materialize `Manager` to adopt its storage. Initialize(std::move(manager).Construct()); } template inline void AnyBase::InitializeFromAnyInitializer( AnyInitializer manager) { std::move(manager).Construct(repr_.storage, &methods_and_handle_, kAvailableSize, kAvailableAlign); } template template , int>> inline void AnyBase::Adopt( Manager&& manager) { manager.methods_and_handle_.methods->move_to_heap( manager.repr_.storage, repr_.storage, &methods_and_handle_); manager.methods_and_handle_.methods = &NullMethods::kMethods; manager.methods_and_handle_.handle = SentinelHandle(); } template template , int>> inline void AnyBase::Adopt( Manager&& manager) { manager.methods_and_handle_.methods->make_reference( manager.repr_.storage, repr_.storage, &methods_and_handle_); } template inline void AnyBase::Destroy() { methods_and_handle_.methods->destroy(repr_.storage); methods_and_handle_.handle.~Handle(); } template inline void AnyBase::Reset(std::nullptr_t) { methods_and_handle_.methods->destroy(repr_.storage); methods_and_handle_.methods = &NullMethods::kMethods; methods_and_handle_.handle = SentinelHandle(); } template template ::value, int>> inline Manager* AnyBase::GetIf() ABSL_ATTRIBUTE_LIFETIME_BOUND { if (!Holds()) return nullptr; return &methods_and_handle_.methods->get_raw_manager(repr_.storage) .template Cast(); } template template ::value, int>> inline const Manager* AnyBase::GetIf() const ABSL_ATTRIBUTE_LIFETIME_BOUND { if (!Holds()) return nullptr; return &methods_and_handle_.methods->get_raw_manager(repr_.storage) .template Cast(); } } // namespace any_internal template template < typename Manager, std::enable_if_t< std::conjunction_v, TargetT>, TargetSupportsDependency>, int>> inline Any::Any(Manager&& manager) { this->template Initialize>(std::forward(manager)); } template template < typename Manager, std::enable_if_t< std::conjunction_v, TargetT>, TargetSupportsDependency>, int>> inline Any& Any::operator=(Manager&& manager) { this->Destroy(); this->template Initialize>(std::forward(manager)); return *this; } template template < typename Manager, std::enable_if_t>, int>> inline Any::Any(Manager manager) { this->InitializeFromAnyInitializer(std::move(manager)); } template template < typename Manager, std::enable_if_t>, int>> inline Any& Any::operator=(Manager manager) { this->Destroy(); this->InitializeFromAnyInitializer(std::move(manager)); return *this; } template template < typename Manager, std::enable_if_t, TargetT>, NotSameRef, TargetT>, std::negation>>, SupportsDependency&&>>, int>> inline AnyRef::AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND, TemporaryStorage>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) { this->template Initialize&&>( Initializer>(std::forward(manager)) .Reference(std::move(storage))); } template template , TargetT>, NotSameRef, TargetT>, std::is_reference>, TargetRefSupportsDependency>, int>> inline AnyRef::AnyRef(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND) { this->template Initialize>( std::forward(manager)); } template template , Any>, int>> inline AnyRef::AnyRef(Manager&& manager) { this->template Initialize>(std::forward(manager)); } template template < typename Manager, std::enable_if_t>, int>> inline AnyRef::AnyRef(Manager manager) { this->InitializeFromAnyInitializer(std::move(manager)); } } // namespace riegeli #endif // RIEGELI_BASE_ANY_H_ ================================================ FILE: riegeli/base/any_initializer.h ================================================ // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ANY_INITIALIZER_H_ #define RIEGELI_BASE_ANY_INITIALIZER_H_ #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "riegeli/base/any_internal.h" #include "riegeli/base/dependency.h" #include "riegeli/base/initializer.h" #include "riegeli/base/type_erased_ref.h" #include "riegeli/base/type_traits.h" namespace riegeli { namespace any_internal { template class AnyBase; } // namespace any_internal // A parameter of type `AnyInitializer` allows the caller to specify an // `Any` by passing a value convertible to `Any`. // // In contrast to accepting `Any` directly, this allows to construct the // object in-place, avoiding constructing a temporary and moving from it. This // also avoids specifying `::Inlining<...>` in the interface while benefiting // from that in the implementation. // // `AnyInitializer` also allows to initialize an `Any` for a // `Handle` type which is neither default-constructible nor supporting // `RiegeliDependencySentinel()`, which makes `Any` immovable. // // `AnyInitializer` is similar to `Initializer>`, except // that it efficiently handles `Any` specializations with any inline // storage constraints. // // `AnyInitializer(manager)` does not own `manager`, even if it involves // temporaries, hence it should be used only as a parameter of a function or // constructor, so that the temporaries outlive its usage. Instead of storing // an `AnyInitializer` in a variable or returning it from a function, // consider `riegeli::OwningMaker(manager_args...)`, // `MakerTypeFor`, or `Any`. template class ABSL_NULLABILITY_COMPATIBLE AnyInitializer { public: // An `Any` will be empty. AnyInitializer() noexcept : construct_(ConstructMethodEmpty) {} /*implicit*/ AnyInitializer(std::nullptr_t) : construct_(ConstructMethodEmpty) {} // An `Any` will hold a `Dependency>`. // // If `TargetT` is already a compatible `Any` or `AnyRef`, possibly // wrapped in `ClosingPtrType`, or an rvalue reference to it, adopts its // storage instead of keeping an indirection. This causes `GetIf()` to see // through it. template >, TargetSupportsDependency>, int> = 0> /*implicit*/ AnyInitializer(Manager&& manager ABSL_ATTRIBUTE_LIFETIME_BOUND) : construct_(ConstructMethod), context_(std::forward(manager)) {} AnyInitializer(AnyInitializer&& that) = default; AnyInitializer& operator=(AnyInitializer&&) = delete; private: // For `Construct()`. template friend class any_internal::AnyBase; // For `ABSL_NULLABILITY_COMPATIBLE`. using pointer = std::conditional_t, Handle, void*>; using Storage = any_internal::Storage; using MethodsAndHandle = any_internal::MethodsAndHandle; using NullMethods = any_internal::NullMethods; template using MethodsFor = any_internal::MethodsFor; static void ConstructMethodEmpty(TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align); template < typename Manager, std::enable_if_t< std::conjunction_v< std::negation>>, std::negation< any_internal::IsAnyClosingPtr>>>, int> = 0> static void ConstructMethod(TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align); template >::value, int> = 0> static void ConstructMethod(TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align); template >::value, int> = 0> static void ConstructMethod(TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align); template , int> = 0> static void Adopt(Target&& target, Storage dest, MethodsAndHandle* dest_methods_and_handle); template , int> = 0> static void Adopt(Target&& target, Storage dest, MethodsAndHandle* dest_methods_and_handle); // Constructs `dest` with `*dest_methods_and_handle` by moving from `*this`. void Construct(Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align) && { construct_(context_, dest, dest_methods_and_handle, available_size, available_align); } void (*construct_)(TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align); TypeErasedRef context_; }; // Implementation details follow. template void AnyInitializer::ConstructMethodEmpty( ABSL_ATTRIBUTE_UNUSED TypeErasedRef context, ABSL_ATTRIBUTE_UNUSED Storage dest, MethodsAndHandle* dest_methods_and_handle, ABSL_ATTRIBUTE_UNUSED size_t available_size, ABSL_ATTRIBUTE_UNUSED size_t available_align) { dest_methods_and_handle->methods = &NullMethods::kMethods; new (&dest_methods_and_handle->handle) Handle(any_internal::SentinelHandle()); } template template >>, std::negation< any_internal::IsAnyClosingPtr>>>, int>> void AnyInitializer::ConstructMethod( TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align) { using Target = TargetT; // This is equivalent to calling `MethodsFor::Construct()` // or `MethodsFor::Construct()`. Separate allocation of // `Dependency` from its construction, so that the code for // construction can be shared between the two cases, reducing the code size. Dependency* dep_ptr; const any_internal::Methods* methods_ptr; if (any_internal::ReprIsInline(available_size, available_align)) { dep_ptr = reinterpret_cast*>(dest); methods_ptr = &MethodsFor::kMethods; } else { if constexpr (alignof(Dependency) > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { dep_ptr = static_cast*>(operator new( sizeof(Dependency), std::align_val_t(alignof(Dependency)))); } else { dep_ptr = static_cast*>(operator new( sizeof(Dependency))); } new (dest) Dependency*(dep_ptr); methods_ptr = &MethodsFor::kMethods; } new (dep_ptr) Dependency(context.Cast()); dest_methods_and_handle->methods = methods_ptr; new (&dest_methods_and_handle->handle) Handle(dep_ptr->get()); } template template < typename Manager, std::enable_if_t>::value, int>> void AnyInitializer::ConstructMethod( TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, size_t available_size, size_t available_align) { using Target = TargetT; using TargetValue = std::remove_reference_t; // Materialize `Target` to adopt its storage. [&](Target&& target) { // `target.methods_and_handle_.methods->used_size <= // TargetValue::kAvailableSize`, hence if // `TargetValue::kAvailableSize == 0` then // `target.methods_and_handle_.methods->used_size <= available_size`. // No need to check possibly at runtime. if ((TargetValue::kAvailableSize == 0 || target.methods_and_handle_.methods->used_size <= available_size) && // Same for alignment. (TargetValue::kAvailableAlign == 0 || target.methods_and_handle_.methods->used_align <= available_align)) { // Adopt `target` instead of wrapping it. target.methods_and_handle_.methods->move(target.repr_.storage, dest, dest_methods_and_handle); target.methods_and_handle_.methods = &NullMethods::kMethods; target.methods_and_handle_.handle = any_internal::SentinelHandle(); return; } Adopt(std::forward(target), dest, dest_methods_and_handle); }(Initializer(context.Cast()).Reference()); } template template < typename Manager, std::enable_if_t< any_internal::IsAnyClosingPtr>::value, int>> void AnyInitializer::ConstructMethod( TypeErasedRef context, Storage dest, MethodsAndHandle* dest_methods_and_handle, ABSL_ATTRIBUTE_UNUSED size_t available_size, ABSL_ATTRIBUTE_UNUSED size_t available_align) { using Target = TargetT; // Materialize `Target` to adopt its storage. const Target target = Initializer(context.Cast()).Construct(); if (target == nullptr) { dest_methods_and_handle->methods = &NullMethods::kMethods; new (&dest_methods_and_handle->handle) Handle(any_internal::SentinelHandle()); return; } // Adopt `*manager` by referring to its representation. target->methods_and_handle_.methods->make_reference( target->repr_.storage, dest, dest_methods_and_handle); } template template , int>> inline void AnyInitializer::Adopt( Target&& target, Storage dest, MethodsAndHandle* dest_methods_and_handle) { target.methods_and_handle_.methods->move_to_heap(target.repr_.storage, dest, dest_methods_and_handle); target.methods_and_handle_.methods = &NullMethods::kMethods; target.methods_and_handle_.handle = any_internal::SentinelHandle(); } template template , int>> inline void AnyInitializer::Adopt( Target&& target, Storage dest, MethodsAndHandle* dest_methods_and_handle) { target.methods_and_handle_.methods->make_reference(target.repr_.storage, dest, dest_methods_and_handle); } } // namespace riegeli #endif // RIEGELI_BASE_ANY_INITIALIZER_H_ ================================================ FILE: riegeli/base/any_internal.h ================================================ // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ANY_INTERNAL_H_ #define RIEGELI_BASE_ANY_INTERNAL_H_ #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/meta/type_traits.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/closing_ptr.h" #include "riegeli/base/dependency.h" #include "riegeli/base/dependency_base.h" #include "riegeli/base/initializer.h" #include "riegeli/base/memory_estimator.h" #include "riegeli/base/type_erased_ref.h" #include "riegeli/base/type_id.h" namespace riegeli { template class Any; template class AnyRef; namespace any_internal { // Variants of `Repr`: // * Empty `Any`: `Repr` is not used // * Stored inline: `storage` holds `Dependency` // * Held by pointer: `storage` holds `Dependency*` template struct Repr { // clang-format off alignas(UnsignedMax(alignof(void*), inline_align)) char storage[UnsignedMax(sizeof(void*), inline_size)]; // clang-format on }; // By convention, a parameter of type `Storage` points to // `Repr::storage`. using Storage = char[]; // A `Dependency` is stored inline in // `Repr` if it is movable and it fits there. // // If `inline_size == 0`, the dependency is also required to be stable (because // then `Any` declares itself stable) and trivially relocatable (because then // `Any` declares itself with trivial ABI and optimizes moving to a plain memory // copy of the representation). // Properties of inline storage in an `Any` instance are expressed as two // numbers: `available_size` and `available_align`, while constraints of a // movable `Dependency` instance on its storage are expressed as two numbers: // `used_size` and `used_align`, such that // `used_size <= available_size && used_align <= available_align` implies that // the movable `Dependency` can be stored inline in the `Any`. // // This formulation allows reevaluating the condition with different values of // `available_size` and `available_align` when considering adopting the storage // for a different `Any` instance, at either compile time or runtime. // Returns `available_size`: `sizeof` the storage, except that 0 indicates // `inline_size == 0`, which means the minimal size of any inline storage with // the given alignment, while also putting additional constraints on the // `Dependency` (stability and trivial relocatability). template constexpr size_t AvailableSize() { if (inline_size == 0) return 0; return sizeof(Repr); } // Returns `available_align`: `alignof` the storage, except that 0 means the // minimal alignment of any inline storage. template constexpr size_t AvailableAlign() { if (alignof(Repr) == alignof(Repr)) { return 0; } return alignof(Repr); } // Returns `used_size`: `sizeof` the `Dependency`, except that 0 indicates // compatibility with `inline_size == 0`, which means fitting under the minimal // size of any inline storage with the given alignment, and being stable. template constexpr size_t UsedSize() { if (sizeof(Dependency) <= sizeof(Repr)>) && Dependency::kIsStable) { return 0; } return sizeof(Dependency); } // Returns `used_align`: `alignof` the storage, except that 0 means fitting // under the minimal alignment of any inline storage. Making this a special // case allows to optimize out comparisons of a compile time `used_align` // against a runtime `available_align`. template constexpr size_t UsedAlign() { if (alignof(Dependency) <= alignof(Repr)) { return 0; } return alignof(Dependency); } template constexpr bool ReprIsInline(size_t available_size, size_t available_align) { return std::is_move_constructible_v> && UsedSize() <= available_size && UsedAlign() <= available_align; } template constexpr bool IsInline() { return ReprIsInline( AvailableSize(), AvailableAlign()); } template struct MethodsAndHandle; // Method pointers. template struct Methods { // Destroys `self`. void (*destroy)(Storage self); // Constructs `dest` with `*dest_handle` by moving from `src`. Destroys `src`. void (*move)(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle); // Constructs a differently represented `dest` with `*dest_methods_and_handle` // by moving from `src` to the heap and pointing `dest` to that. Destroys // `src`. Used only if `used_size > 0`. void (*move_to_heap)(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle); // Constructs a differently represented `dest` with `*dest_methods_and_handle` // by pointing `dest` to `src`. void (*make_reference)(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle); size_t used_size; size_t used_align; TypeId type_id; bool (*is_owning)(const Storage self); // Returns the `Manager&` stored in `self`, with the `Manager` type // corresponding to `type_id`. Used only if `type_id != nullptr`. // If `self` is const then `Manager` should be const, otherwise `Manager` // can be non-const. TypeErasedRef (*get_raw_manager)(const Storage self); void (*register_subobjects)(const Storage self, MemoryEstimator& memory_estimator); }; // Grouped members so that their address can be passed together. template struct MethodsAndHandle { MethodsAndHandle() noexcept {} const Methods* methods; union { Handle handle; }; }; template struct NullMethods; template struct MethodsForReference; template struct MethodsFor; // `IsAny` detects `Any` or `AnyRef` type with the given `Handle`, or an rvalue // reference to it. template struct IsAny : std::false_type {}; template struct IsAny> : std::true_type { }; template struct IsAny> : std::true_type {}; template struct IsAny : IsAny {}; // `IsAnyClosingPtr` detects `Any` or `AnyRef` type with the given `Handle`, // wrapped in `ClosingPtrType` or in an rvalue reference to it. template struct IsAnyClosingPtr : std::false_type {}; template struct IsAnyClosingPtr< Handle, std::unique_ptr, NullDeleter>> : std::true_type {}; template struct IsAnyClosingPtr, NullDeleter>> : std::true_type {}; template struct IsAnyClosingPtr : IsAnyClosingPtr {}; template inline Handle SentinelHandle() { return Initializer( RiegeliDependencySentinel(static_cast(nullptr))); } // Implementation details follow. template struct NullMethods { private: static void Destroy(ABSL_ATTRIBUTE_UNUSED Storage self) {} static void Move(ABSL_ATTRIBUTE_UNUSED Storage src, ABSL_ATTRIBUTE_UNUSED Storage dest, MethodsAndHandle* dest_methods_and_handle) { dest_methods_and_handle->methods = &kMethods; new (&dest_methods_and_handle->handle) Handle(SentinelHandle()); } static bool IsOwning(ABSL_ATTRIBUTE_UNUSED const Storage self) { return false; } static void RegisterSubobjects( ABSL_ATTRIBUTE_UNUSED const Storage self, ABSL_ATTRIBUTE_UNUSED MemoryEstimator& memory_estimator) {} public: static constexpr Methods kMethods = { Destroy, Move, nullptr, Move, 0, 0, nullptr, IsOwning, nullptr, RegisterSubobjects}; }; template struct MethodsForReference { private: static Dependency* dep_ptr(const Storage self) { return *std::launder( reinterpret_cast* const*>(self)); } static void Destroy(ABSL_ATTRIBUTE_UNUSED Storage self) {} static void Move(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle) { new (dest) Dependency*(dep_ptr(src)); dest_methods_and_handle->methods = &kMethods; new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get()); } static bool IsOwning(const Storage self) { return dep_ptr(self)->IsOwning(); } static TypeErasedRef GetRawManager(const Storage self) { return TypeErasedRef(dep_ptr(self)->manager()); } static void RegisterSubobjects( ABSL_ATTRIBUTE_UNUSED const Storage self, ABSL_ATTRIBUTE_UNUSED MemoryEstimator& memory_estimator) {} public: static constexpr Methods kMethods = { Destroy, Move, nullptr, Move, 0, 0, TypeId::For>(), IsOwning, GetRawManager, RegisterSubobjects}; }; template struct MethodsFor { static void Construct(Storage self, Handle* self_handle, Initializer manager) { new (self) Dependency*( new Dependency(std::move(manager))); new (self_handle) Handle(dep_ptr(self)->get()); } private: static Dependency* dep_ptr(const Storage self) { return *std::launder( reinterpret_cast* const*>(self)); } static void Destroy(Storage self) { delete dep_ptr(self); } static void Move(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle) { new (dest) Dependency*(dep_ptr(src)); dest_methods_and_handle->methods = &kMethods; new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get()); } static void MakeReference(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle) { new (dest) Dependency*(dep_ptr(src)); dest_methods_and_handle->methods = &MethodsForReference::kMethods; new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get()); } static bool IsOwning(const Storage self) { return dep_ptr(self)->IsOwning(); } static TypeErasedRef GetRawManager(const Storage self) { return TypeErasedRef(dep_ptr(self)->manager()); } static void RegisterSubobjects(const Storage self, MemoryEstimator& memory_estimator) { memory_estimator.RegisterDynamicObject(dep_ptr(self)); } public: static constexpr Methods kMethods = { Destroy, Move, nullptr, MakeReference, 0, 0, TypeId::For>(), IsOwning, GetRawManager, RegisterSubobjects}; }; template struct MethodsFor { static void Construct(Storage self, Handle* self_handle, Initializer manager) { new (self) Dependency(std::move(manager)); new (self_handle) Handle(dep(self).get()); } private: static Dependency& dep(Storage self) { return *std::launder(reinterpret_cast*>(self)); } static const Dependency& dep(const Storage self) { return *std::launder( reinterpret_cast*>(self)); } static Dependency* dep_ptr(const Storage self) { return *std::launder( reinterpret_cast* const*>(self)); } static void Destroy(Storage self) { dep(self).~Dependency(); } static void Move(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle) { new (dest) Dependency(std::move(dep(src))); dep(src).~Dependency(); dest_methods_and_handle->methods = &kMethods; new (&dest_methods_and_handle->handle) Handle(dep(dest).get()); } static void MoveToHeap(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle) { new (dest) Dependency*( new Dependency(std::move(dep(src)))); dep(src).~Dependency(); dest_methods_and_handle->methods = &MethodsFor::kMethods; new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get()); } static void MakeReference(Storage src, Storage dest, MethodsAndHandle* dest_methods_and_handle) { new (dest) Dependency*(&dep(src)); dest_methods_and_handle->methods = &MethodsForReference::kMethods; new (&dest_methods_and_handle->handle) Handle(dep_ptr(dest)->get()); } static bool IsOwning(const Storage self) { return dep(self).IsOwning(); } static TypeErasedRef GetRawManager(const Storage self) { return TypeErasedRef(dep(self).manager()); } static void RegisterSubobjects(const Storage self, MemoryEstimator& memory_estimator) { memory_estimator.RegisterSubobjects(&dep(self)); } public: static constexpr Methods kMethods = { Destroy, Move, MoveToHeap, MakeReference, UsedSize(), UsedAlign(), TypeId::For>(), IsOwning, GetRawManager, RegisterSubobjects}; }; } // namespace any_internal } // namespace riegeli #endif // RIEGELI_BASE_ANY_INTERNAL_H_ ================================================ FILE: riegeli/base/arithmetic.h ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ARITHMETIC_H_ #define RIEGELI_BASE_ARITHMETIC_H_ #include #include #include #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "absl/numeric/bits.h" #include "absl/numeric/int128.h" #include "riegeli/base/assert.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `IsUnsignedInt::value` is `true` for unsigned integral types, including // `absl::uint128`. template struct IsUnsignedInt : std::conjunction, std::is_unsigned> {}; template <> struct IsUnsignedInt : std::true_type {}; // `IsSignedInt::value` is `true` for signed integral types, including // `absl::int128`. template struct IsSignedInt : std::conjunction, std::is_signed> { }; template <> struct IsSignedInt : std::true_type {}; // `IsInt::value` is `true` for integral types, including `absl::uint128` and // `absl::int128`. template struct IsInt : std::is_integral {}; template <> struct IsInt : std::true_type {}; template <> struct IsInt : std::true_type {}; // `MakeUnsigned::type` and `MakeUnsignedT` transform a signed integral // type to the corresponding unsigned type, including `absl::int128`, and leave // unsigned integral types unchanged, including `absl::uint128`. template struct MakeUnsigned : std::make_unsigned {}; template <> struct MakeUnsigned { using type = absl::uint128; }; template <> struct MakeUnsigned { using type = absl::uint128; }; template using MakeUnsignedT = typename MakeUnsigned::type; // `IntCast(value)` converts between integral types, asserting that `value` // fits in the target type. template , IsUnsignedInt>, int> = 0> constexpr A IntCast(B value) { RIEGELI_ASSERT_LE(value, std::numeric_limits::max()) << "Failed precondition of IntCast(): value out of range"; return static_cast(value); } template , IsSignedInt>, int> = 0> constexpr A IntCast(B value) { RIEGELI_ASSERT_GE(value, 0) << "Failed precondition of IntCast(): value out of range"; RIEGELI_ASSERT_LE(static_cast>(value), std::numeric_limits::max()) << "Failed precondition of IntCast(): value out of range"; return static_cast(value); } template , IsUnsignedInt>, int> = 0> constexpr A IntCast(B value) { RIEGELI_ASSERT_LE(value, MakeUnsignedT{std::numeric_limits::max()}) << "Failed precondition of IntCast(): value out of range"; return static_cast(value); } template , IsSignedInt>, int> = 0> constexpr A IntCast(B value) { RIEGELI_ASSERT_GE(value, std::numeric_limits::min()) << "Failed precondition of IntCast(): value out of range"; RIEGELI_ASSERT_LE(value, std::numeric_limits::max()) << "Failed precondition of IntCast(): value out of range"; return static_cast(value); } // `UnsignedCast(value)` converts `value` to the corresponding unsigned type, // asserting that `value` was non-negative. template , IsUnsignedInt>, int> = 0> constexpr MakeUnsignedT UnsignedCast(T value) { return IntCast>(value); } // `NegatingUnsignedCast(value)` converts `-value` to the corresponding unsigned // type, asserting that `value` was non-positive, and correctly handling // `std::numeric_limits::min()`. template ::value, int> = 0> constexpr MakeUnsignedT NegatingUnsignedCast(T value) { RIEGELI_ASSERT_LE(value, 0) << "Failed precondition of NegatingUnsignedCast(): positive value"; // Negate in the unsigned space to correctly handle // `std::numeric_limits::min()`. return static_cast>(0 - static_cast>(value)); } // `SignedMin()` returns the minimum of its arguments, which must be signed // integers, as their widest type. template ::value, int> = 0> constexpr A SignedMin(A a) { return a; } template , IsSignedInt>, int> = 0> constexpr std::common_type_t SignedMin(A a, B b) { return a <= b ? a : b; } template < typename A, typename B, typename... Rest, std::enable_if_t 0)>, IsSignedInt, IsSignedInt, IsSignedInt...>, int> = 0> constexpr std::common_type_t SignedMin(A a, B b, Rest... rest) { return SignedMin(SignedMin(a, b), rest...); } // `SignedMax()` returns the maximum of its arguments, which must be signed // integers, as their widest type. template ::value, int> = 0> constexpr A SignedMax(A a) { return a; } template , IsSignedInt>, int> = 0> constexpr std::common_type_t SignedMax(A a, B b) { return a >= b ? a : b; } template < typename A, typename B, typename... Rest, std::enable_if_t 0)>, IsSignedInt, IsSignedInt, IsSignedInt...>, int> = 0> constexpr std::common_type_t SignedMax(A a, B b, Rest... rest) { return SignedMax(SignedMax(a, b), rest...); } // `UnsignedMin()` returns the minimum of its arguments, which must be unsigned // integers, as their narrowest type. template ::value, int> = 0> constexpr A UnsignedMin(A a) { return a; } template , IsUnsignedInt>, int> = 0> constexpr IntersectionTypeT UnsignedMin(A a, B b) { return static_cast>(a <= b ? a : b); } template 0)>, IsUnsignedInt, IsUnsignedInt, IsUnsignedInt...>, int> = 0> constexpr IntersectionTypeT UnsignedMin(A a, B b, Rest... rest) { return UnsignedMin(UnsignedMin(a, b), rest...); } // `UnsignedMax()` returns the maximum of its arguments, which must be unsigned // integers, as their widest type. template ::value, int> = 0> constexpr A UnsignedMax(A a) { return a; } template , IsUnsignedInt>, int> = 0> constexpr std::common_type_t UnsignedMax(A a, B b) { return a >= b ? a : b; } template 0)>, IsUnsignedInt, IsUnsignedInt, IsUnsignedInt...>, int> = 0> constexpr std::common_type_t UnsignedMax(A a, B b, Rest... rest) { return UnsignedMax(UnsignedMax(a, b), rest...); } // `UnsignedClamp(value, min_value, max_value)` is at least `min_value`, // at most `max(max_value, min_value)`, preferably `value`. // // If `min_value <= max_value`, then it is equivalent to `std::clamp()`, // otherwise `min_value` wins. template < typename Value, typename Min, typename Max, std::enable_if_t, IsUnsignedInt, IsUnsignedInt>, int> = 0> constexpr std::common_type_t, Min> UnsignedClamp( Value value, Min min, Max max) { return UnsignedMax(UnsignedMin(value, max), min); } // `SaturatingIntCast()` converts an integer value to another integer type, or // returns the appropriate bound of the type if conversion would overflow. template , IsUnsignedInt>, int> = 0> constexpr A SaturatingIntCast(B value) { if (ABSL_PREDICT_FALSE(value > std::numeric_limits::max())) { return std::numeric_limits::max(); } return static_cast(value); } template , IsSignedInt>, int> = 0> constexpr A SaturatingIntCast(B value) { if (ABSL_PREDICT_FALSE(value < 0)) return 0; if (ABSL_PREDICT_FALSE(static_cast>(value) > std::numeric_limits::max())) { return std::numeric_limits::max(); } return static_cast(value); } template , IsUnsignedInt>, int> = 0> constexpr A SaturatingIntCast(B value) { if (ABSL_PREDICT_FALSE(value > MakeUnsignedT{std::numeric_limits::max()})) { return std::numeric_limits::max(); } return static_cast(value); } template , IsSignedInt>, int> = 0> constexpr A SaturatingIntCast(B value) { if (ABSL_PREDICT_FALSE(value < std::numeric_limits::min())) { return std::numeric_limits::min(); } if (ABSL_PREDICT_FALSE(value > std::numeric_limits::max())) { return std::numeric_limits::max(); } return static_cast(value); } // `SaturatingAdd()` adds unsigned values, or returns max possible value of the // type if addition would overflow. template ::value, int> = 0> constexpr T SaturatingAdd() { return 0; } template ::value, int> = 0> constexpr T SaturatingAdd(T a) { return a; } template ::value, int> = 0> constexpr T SaturatingAdd(T a, T b) { if (ABSL_PREDICT_FALSE(b > std::numeric_limits::max() - a)) { return std::numeric_limits::max(); } return a + b; } template 0)>, IsUnsignedInt, IsUnsignedInt...>, int> = 0> constexpr T SaturatingAdd(T a, T b, Rest... rest) { return SaturatingAdd(SaturatingAdd(a, b), rest...); } // `SaturatingSub()` subtracts unsigned values, or returns 0 if subtraction // would underflow. template , IsUnsignedInt>, int> = 0> constexpr T SaturatingSub(T a, U b) { if (ABSL_PREDICT_FALSE(b > a)) return 0; return a - IntCast(b); } // `RoundDown()` rounds an unsigned value downwards to the nearest multiple of // the given power of 2. template , std::integral_constant< bool, absl::has_single_bit(alignment)>>, int> = 0> constexpr T RoundDown(T value) { return value & ~T{alignment - 1}; } // `RoundUp()` rounds an unsigned value upwards to the nearest multiple of the // given power of 2. template , std::integral_constant< bool, absl::has_single_bit(alignment)>>, int> = 0> constexpr T RoundUp(T value) { return ((value - 1) | T{alignment - 1}) + 1; } // `PtrDistance(first, last)` returns `last - first` as `size_t`, asserting that // `first <= last`. template constexpr size_t PtrDistance(const A* absl_nullable first, const A* absl_nullable last) { RIEGELI_ASSERT_EQ(first == nullptr, last == nullptr) << "Failed precondition of PtrDistance(): " "nullptr compared with non-nullptr"; RIEGELI_ASSERT_LE(first, last) << "Failed precondition of PtrDistance(): pointers in the wrong order"; return IntCast(last - first); } } // namespace riegeli #endif // RIEGELI_BASE_ARITHMETIC_H_ ================================================ FILE: riegeli/base/assert.cc ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/assert.h" #include #include "absl/base/nullability.h" #include "absl/log/absl_log.h" #include "riegeli/base/stream_utils.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli::assert_internal { CheckResult::CheckResult(const char* function, const char* prefix) : header_(new StringOStream(new std::string())) { header() << "Check failed in " << function << ": " << prefix; } CheckFailed::CheckFailed(const char* file, int line, CheckResult check_result) : file_(file), line_(line), check_result_(check_result), details_(new StringOStream(new std::string())) {} CheckFailed::~CheckFailed() { if (!details_->dest()->empty()) { check_result_.header() << "; " << *details_->dest(); } ABSL_LOG(FATAL).AtLocation(file_, line_) << *check_result_.header().dest(); } void CheckNotNullFailed(const char* file, int line, const char* function, const char* expression) { CheckResult check_result(function, expression); check_result.header() << " != nullptr"; CheckFailed check_failed(file, line, check_result); } CheckResult CheckImpossibleResult(const char* function) { return CheckResult(function, "Impossible"); } } // namespace riegeli::assert_internal ================================================ FILE: riegeli/base/assert.h ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ASSERT_H_ #define RIEGELI_BASE_ASSERT_H_ #include #include #include // IWYU pragma: export #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "riegeli/base/debug.h" #include "riegeli/base/port.h" #include "riegeli/base/stream_utils.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `RIEGELI_DEBUG` determines whether assertions are verified or just assumed. // By default it follows `NDEBUG`. #ifndef RIEGELI_DEBUG #ifdef NDEBUG #define RIEGELI_DEBUG 0 #else #define RIEGELI_DEBUG 1 #endif #endif namespace assert_internal { #if __cpp_lib_unreachable #define RIEGELI_INTERNAL_UNREACHABLE() ::std::unreachable() #elif RIEGELI_INTERNAL_HAS_BUILTIN(__builtin_unreachable) || \ RIEGELI_INTERNAL_IS_GCC_VERSION(4, 5) #define RIEGELI_INTERNAL_UNREACHABLE() __builtin_unreachable() #elif defined(_WIN32) #define RIEGELI_INTERNAL_UNREACHABLE() __assume(false) #else #define RIEGELI_INTERNAL_UNREACHABLE() for (;;) #endif // Indicates that a check succeeded or failed. // // If it failed, stores a stream for writing the header. class CheckResult { public: // A check succeeded. CheckResult() = default; // A check failed. The header will begin with // "Check failed in function: prefix". explicit CheckResult(const char* function, const char* prefix); CheckResult(const CheckResult& that) = default; CheckResult& operator=(const CheckResult& that) = default; // Returns `true` if the check succeeded. explicit operator bool() const { return header_ == nullptr; } // Returns the header stream. // // Precondition: the check failed, i.e. `*this` is `false`. StringOStream& header() { assert(header_ != nullptr); return *header_; } private: StringOStream* absl_nullable header_ = nullptr; }; // Stores a `CheckResult` and a stream for adding details to the message. // The message is "header; details", or just "header" if details are empty. // In the destructor, outputs the message and terminates the program. class CheckFailed { public: explicit CheckFailed(const char* file, int line, CheckResult check_result); // Allows to add details to the message by writing to the stream. std::ostream& details() { return *details_; } // Prints the check failure message and terminates the program. ABSL_ATTRIBUTE_NORETURN ~CheckFailed(); private: const char* file_; int line_; CheckResult check_result_; StringOStream* details_; }; // Indicates that a check failed with the message header // "Check failed in function: assertion (a vs. b)". template ABSL_ATTRIBUTE_COLD CheckResult CheckOpResult(const char* function, const char* assertion, const A& a, const B& b) { CheckResult check_result(function, assertion); check_result.header() << " (" << riegeli::Debug(a) << " vs. " << riegeli::Debug(b) << ")"; return check_result; } // Indicates that a check failed with the message header // "Check failed in function: expression is OK (status)". namespace assert_internal { template struct HasStatus : std::false_type {}; template struct HasStatus().status())>> : std::true_type {}; } // namespace assert_internal template ABSL_ATTRIBUTE_COLD CheckResult CheckOkResult(const char* function, const char* expression, const StatusType& status) { if constexpr (!assert_internal::HasStatus::value) { // `absl::Status`. CheckResult check_result(function, expression); check_result.header() << " is OK (" << status << ")"; return check_result; } else { // `absl::StatusOr`. return CheckOkResult(function, expression, status.status()); } } // Writes "Check failed in function: expression != nullptr" and terminates // the program. ABSL_ATTRIBUTE_NORETURN void CheckNotNullFailed(const char* file, int line, const char* function, const char* expression); // Indicates that a check failed with the message header // "Check failed in function: Impossible". ABSL_ATTRIBUTE_COLD CheckResult CheckImpossibleResult(const char* function); // These functions allow using `a` and `b` multiple times without reevaluation. // They are small enough to be inlined, with the slow path delegated to // `CheckOpResult()`. #define RIEGELI_INTERNAL_DEFINE_CHECK_OP(name, op) \ template \ inline CheckResult Check##name(const char* function, const char* assertion, \ const A& a, const B& b) { \ if (ABSL_PREDICT_TRUE(a op b)) return CheckResult(); \ CheckResult check_result = CheckOpResult(function, assertion, a, b); \ if (check_result) RIEGELI_INTERNAL_UNREACHABLE(); \ return check_result; \ } \ static_assert(true, "") // Eat a semicolon. RIEGELI_INTERNAL_DEFINE_CHECK_OP(Eq, ==); RIEGELI_INTERNAL_DEFINE_CHECK_OP(Ne, !=); RIEGELI_INTERNAL_DEFINE_CHECK_OP(Lt, <); RIEGELI_INTERNAL_DEFINE_CHECK_OP(Gt, >); RIEGELI_INTERNAL_DEFINE_CHECK_OP(Le, <=); RIEGELI_INTERNAL_DEFINE_CHECK_OP(Ge, >=); #undef RIEGELI_INTERNAL_DEFINE_CHECK_OP template inline CheckResult CheckOk(const char* function, const char* expression, const StatusType& status) { if (ABSL_PREDICT_TRUE(status.ok())) return CheckResult(); CheckResult check_result = CheckOkResult(function, expression, status); if (check_result) RIEGELI_INTERNAL_UNREACHABLE(); return check_result; } template inline T&& CheckNotNull(const char* file, int line, const char* function, const char* expression, T&& value) { if (ABSL_PREDICT_FALSE(value == nullptr)) { CheckNotNullFailed(file, line, function, expression); } return std::forward(value); } #if !RIEGELI_DEBUG // These functions allow using `a` and `b` multiple times without reevaluation. // They are small enough to be inlined. #define RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(name, op) \ template \ inline bool EvalAssert##name(const A& a, const B& b) { \ return true || a op b; /* Check that this compiles. */ \ } \ static_assert(true, "") // Eat a semicolon. RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Eq, ==); RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Ne, !=); RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Lt, <); RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Gt, >); RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Le, <=); RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP(Ge, >=); #undef RIEGELI_INTERNAL_DEFINE_EVAL_ASSERT_OP template inline T&& EvalAssertNotNull(T&& value) { ABSL_ATTRIBUTE_UNUSED const bool condition = true || value == nullptr; // Check that this compiles. return std::forward(value); } template inline bool EvalAssertOk(const StatusType& status) { return true || status.ok(); // Check that this compiles. } template inline T&& AssumeNotNull(T&& value) { if (value == nullptr) RIEGELI_INTERNAL_UNREACHABLE(); return std::forward(value); } #ifdef _MSC_VER // Silence MSVC warning for destructor that does not return. #pragma warning(push) #pragma warning(disable : 4722) #endif class UnreachableStream { public: UnreachableStream() { RIEGELI_INTERNAL_UNREACHABLE(); } ABSL_ATTRIBUTE_NORETURN ~UnreachableStream() { RIEGELI_INTERNAL_UNREACHABLE(); } template UnreachableStream& operator<<(ABSL_ATTRIBUTE_UNUSED T&& src) { return *this; } }; #ifdef _MSC_VER #pragma warning(pop) #endif #endif // !RIEGELI_DEBUG // Allow `MACRO()` expanding to `if (condition) ...; else ...`, to be usable as // `if (condition) MACRO();` without a warning about ambiguous `else`. // The definition of `MACRO()` must begin with `RIEGELI_INTERNAL_BLOCK_ELSE`. #define RIEGELI_INTERNAL_BLOCK_ELSE \ switch (0) \ case 0: \ default: } // namespace assert_internal // `RIEGELI_CHECK(expr)` checks that `expr` is `true`, terminating the program // if not. // // `RIEGELI_CHECK_{EQ,NE,LT,GT,LE,GE}(a, b)` check the relationship between `a` // and `b`, and include values of `a` and `b` in the failure message. The values // must be printable using `riegeli::Debug()`. // // `RIEGELI_CHECK_NOTNULL(expr)` checks that `expr` is not `nullptr` and returns // `expr`. // // `RIEGELI_CHECK_OK(expr)` checks that `expr.ok()`, and includes either // `expr.status()` or `expr` in the failure message. Supported types include // `absl::Status`, `absl::StatusOr`, and classes deriving from `Object`. // // `RIEGELI_CHECK_NOTNULL(expr)` is an expression which evaluates to `expr`. // The remaining `RIEGELI_CHECK*` macros can be followed by streaming `<<` // operators in order to append more details to the failure message // (streamed expressions are evaluated only on assertion failure). // // `RIEGELI_CHECK_UNREACHABLE()` checks that this point is not reached. #if defined(__clang__) || RIEGELI_INTERNAL_IS_GCC_VERSION(2, 6) #define RIEGELI_INTERNAL_FUNCTION __PRETTY_FUNCTION__ #elif defined(_WIN32) #define RIEGELI_INTERNAL_FUNCTION __FUNCSIG__ #else #define RIEGELI_INTERNAL_FUNCTION __func__ #endif #define RIEGELI_INTERNAL_CHECK_OP(name, op, a, b) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if (const ::riegeli::assert_internal::CheckResult riegeli_internal_check = \ ::riegeli::assert_internal::Check##name(RIEGELI_INTERNAL_FUNCTION, \ #a " " #op " " #b, a, b)) \ ; \ else \ ::riegeli::assert_internal::CheckFailed(__FILE__, __LINE__, \ riegeli_internal_check) \ .details() #define RIEGELI_CHECK(expr) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if (ABSL_PREDICT_TRUE(expr)) \ ; \ else \ ::riegeli::assert_internal::CheckFailed( \ __FILE__, __LINE__, \ ::riegeli::assert_internal::CheckResult(RIEGELI_INTERNAL_FUNCTION, \ #expr)) \ .details() #define RIEGELI_CHECK_EQ(a, b) RIEGELI_INTERNAL_CHECK_OP(Eq, ==, a, b) #define RIEGELI_CHECK_NE(a, b) RIEGELI_INTERNAL_CHECK_OP(Ne, !=, a, b) #define RIEGELI_CHECK_LT(a, b) RIEGELI_INTERNAL_CHECK_OP(Lt, <, a, b) #define RIEGELI_CHECK_GT(a, b) RIEGELI_INTERNAL_CHECK_OP(Gt, >, a, b) #define RIEGELI_CHECK_LE(a, b) RIEGELI_INTERNAL_CHECK_OP(Le, <=, a, b) #define RIEGELI_CHECK_GE(a, b) RIEGELI_INTERNAL_CHECK_OP(Ge, >=, a, b) #define RIEGELI_CHECK_OK(status) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if (const ::riegeli::assert_internal::CheckResult riegeli_internal_check = \ ::riegeli::assert_internal::CheckOk(RIEGELI_INTERNAL_FUNCTION, \ #status, status)) \ ; \ else \ ::riegeli::assert_internal::CheckFailed(__FILE__, __LINE__, \ riegeli_internal_check) \ .details() #define RIEGELI_CHECK_NOTNULL(expr) \ ::riegeli::assert_internal::CheckNotNull( \ __FILE__, __LINE__, RIEGELI_INTERNAL_FUNCTION, #expr, expr) #define RIEGELI_CHECK_UNREACHABLE() \ ::riegeli::assert_internal::CheckFailed( \ __FILE__, __LINE__, \ ::riegeli::assert_internal::CheckImpossibleResult( \ RIEGELI_INTERNAL_FUNCTION)) \ .details() // If `RIEGELI_DEBUG` is `true`, `RIEGELI_ASSERT*` macros are equivalent to the // corresponding `RIEGELI_CHECK*` macros. // // If `RIEGELI_DEBUG` is `false`, they do nothing except for ensuring that the // assertion compiles, and that any code appending to the stream compiles. // // There is no `RIEGELI_ASSERT_NOTNULL` because the argument is returned, and // thus it is necessarily always evaluated also if `RIEGELI_DEBUG` is `false` // (the semantics of `RIEGELI_ASSERT*` of doing nothing if `RIEGELI_DEBUG` is // `false` cannot be followed). Use `RIEGELI_EVAL_ASSERT_NOTNULL` instead. // // There is no `RIEGELI_ASSERT_UNREACHABLE` because no following code is // expected, and thus this point is necessarily never reached also if // `RIEGELI_DEBUG` is `false` (the semantics of `RIEGELI_ASSERT*` of doing // nothing if `RIEGELI_DEBUG` is `false` cannot be followed). Use // `RIEGELI_ASSUME_UNREACHABLE` instead. #if RIEGELI_DEBUG #define RIEGELI_ASSERT RIEGELI_CHECK #define RIEGELI_ASSERT_EQ RIEGELI_CHECK_EQ #define RIEGELI_ASSERT_NE RIEGELI_CHECK_NE #define RIEGELI_ASSERT_LT RIEGELI_CHECK_LT #define RIEGELI_ASSERT_GT RIEGELI_CHECK_GT #define RIEGELI_ASSERT_LE RIEGELI_CHECK_LE #define RIEGELI_ASSERT_GE RIEGELI_CHECK_GE #define RIEGELI_ASSERT_OK RIEGELI_CHECK_OK #else // !RIEGELI_DEBUG #define RIEGELI_ASSERT(expr) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if (true || (expr)) \ ; \ else \ ::riegeli::assert_internal::UnreachableStream() #define RIEGELI_ASSERT_EQ(a, b) RIEGELI_ASSERT((a) == (b)) #define RIEGELI_ASSERT_NE(a, b) RIEGELI_ASSERT((a) != (b)) #define RIEGELI_ASSERT_LT(a, b) RIEGELI_ASSERT((a) < (b)) #define RIEGELI_ASSERT_GT(a, b) RIEGELI_ASSERT((a) > (b)) #define RIEGELI_ASSERT_LE(a, b) RIEGELI_ASSERT((a) <= (b)) #define RIEGELI_ASSERT_GE(a, b) RIEGELI_ASSERT((a) >= (b)) #define RIEGELI_ASSERT_OK(status) RIEGELI_ASSERT((status).ok()) #endif // !RIEGELI_DEBUG // If `RIEGELI_DEBUG` is `true`, `RIEGELI_EVAL_ASSERT*` macros are equivalent to // the corresponding `RIEGELI_CHECK*` macros. // // If `RIEGELI_DEBUG` is `false`, they evaluate the arguments, but do not check // the assertion, although they verify that evaluating the assertion and any // code appending to the stream compiles. // // There is no `RIEGELI_EVAL_ASSERT_UNREACHABLE` because there is no argument // to evaluate, and because no following code is expected, and thus this point // is necessarily never reached also if `RIEGELI_DEBUG` is `false` (the // semantics of `RIEGELI_EVAL_ASSERT*` of doing nothing besides evaluating the // arguments cannot be followed). Use `RIEGELI_ASSUME_UNREACHABLE` instead. #if RIEGELI_DEBUG #define RIEGELI_EVAL_ASSERT RIEGELI_CHECK #define RIEGELI_EVAL_ASSERT_EQ RIEGELI_CHECK_EQ #define RIEGELI_EVAL_ASSERT_NE RIEGELI_CHECK_NE #define RIEGELI_EVAL_ASSERT_LT RIEGELI_CHECK_LT #define RIEGELI_EVAL_ASSERT_GT RIEGELI_CHECK_GT #define RIEGELI_EVAL_ASSERT_LE RIEGELI_CHECK_LE #define RIEGELI_EVAL_ASSERT_GE RIEGELI_CHECK_GE #define RIEGELI_EVAL_ASSERT_OK RIEGELI_CHECK_OK #define RIEGELI_EVAL_ASSERT_NOTNULL RIEGELI_CHECK_NOTNULL #else // !RIEGELI_DEBUG #define RIEGELI_INTERNAL_EVAL_ASSERT_OP(name, a, b) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if (::riegeli::assert_internal::EvalAssert##name(a, b)) \ ; \ else \ ::riegeli::assert_internal::UnreachableStream() #define RIEGELI_EVAL_ASSERT(expr) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if ((expr) || true) \ ; \ else \ ::riegeli::assert_internal::UnreachableStream() #define RIEGELI_EVAL_ASSERT_EQ(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Eq, a, b) #define RIEGELI_EVAL_ASSERT_NE(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Ne, a, b) #define RIEGELI_EVAL_ASSERT_LT(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Lt, a, b) #define RIEGELI_EVAL_ASSERT_GT(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Gt, a, b) #define RIEGELI_EVAL_ASSERT_LE(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Le, a, b) #define RIEGELI_EVAL_ASSERT_GE(a, b) RIEGELI_INTERNAL_EVAL_ASSERT_OP(Ge, a, b) #define RIEGELI_EVAL_ASSERT_NOTNULL(expr) \ ::riegeli::assert_internal::EvalAssertNotNull(expr) #define RIEGELI_EVAL_ASSERT_OK(status) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if (::riegeli::assert_internal::EvalAssertOk(status)) \ ; \ else \ ::riegeli::assert_internal::UnreachableStream() #endif // If `RIEGELI_DEBUG` is `true`, `RIEGELI_ASSUME*` macros are equivalent to the // corresponding `RIEGELI_CHECK*` macros. // // If `RIEGELI_DEBUG` is `false`, the behavior is undefined if the assertion // fails, which allows the compiler to perform optimizations based on that. // // The condition is evaluated unconditionally, but this should not be relied // upon, as a future implementation might not ensure this. To make it optimized // out when `RIEGELI_DEBUG` is `false`, it should use only operations which are // expected to be optimized out when the result of the condition is not needed, // in particular it should not call non-inline functions. #if RIEGELI_DEBUG #define RIEGELI_ASSUME RIEGELI_CHECK #define RIEGELI_ASSUME_EQ RIEGELI_CHECK_EQ #define RIEGELI_ASSUME_NE RIEGELI_CHECK_NE #define RIEGELI_ASSUME_LT RIEGELI_CHECK_LT #define RIEGELI_ASSUME_GT RIEGELI_CHECK_GT #define RIEGELI_ASSUME_LE RIEGELI_CHECK_LE #define RIEGELI_ASSUME_GE RIEGELI_CHECK_GE #define RIEGELI_ASSUME_OK RIEGELI_CHECK_OK #define RIEGELI_ASSUME_NOTNULL RIEGELI_CHECK_NOTNULL #define RIEGELI_ASSUME_UNREACHABLE RIEGELI_CHECK_UNREACHABLE #else // !RIEGELI_DEBUG #define RIEGELI_ASSUME(expr) \ RIEGELI_INTERNAL_BLOCK_ELSE \ if (expr) \ ; \ else \ RIEGELI_ASSUME_UNREACHABLE() #define RIEGELI_ASSUME_EQ(a, b) RIEGELI_ASSUME((a) == (b)) #define RIEGELI_ASSUME_NE(a, b) RIEGELI_ASSUME((a) != (b)) #define RIEGELI_ASSUME_LT(a, b) RIEGELI_ASSUME((a) < (b)) #define RIEGELI_ASSUME_GT(a, b) RIEGELI_ASSUME((a) > (b)) #define RIEGELI_ASSUME_LE(a, b) RIEGELI_ASSUME((a) <= (b)) #define RIEGELI_ASSUME_GE(a, b) RIEGELI_ASSUME((a) >= (b)) #define RIEGELI_ASSUME_OK(status) RIEGELI_ASSUME((status).ok()) #define RIEGELI_ASSUME_NOTNULL(expr) \ ::riegeli::assert_internal::AssumeNotNull(expr) #define RIEGELI_ASSUME_UNREACHABLE() \ ::riegeli::assert_internal::UnreachableStream() #endif // !RIEGELI_DEBUG // Asserts that a region of memory is initialized, which is checked when running // under memory sanitizer. inline void AssertInitialized(ABSL_ATTRIBUTE_UNUSED const char* data, ABSL_ATTRIBUTE_UNUSED size_t size) { #ifdef MEMORY_SANITIZER __msan_check_mem_is_initialized(data, size); #endif } // Marks that a region of memory should be treated as uninitialized, which is // checked when running under memory sanitizer. inline void MarkPoisoned(ABSL_ATTRIBUTE_UNUSED const char* data, ABSL_ATTRIBUTE_UNUSED size_t size) { #ifdef MEMORY_SANITIZER __msan_poison(data, size); #endif } } // namespace riegeli #endif // RIEGELI_BASE_ASSERT_H_ ================================================ FILE: riegeli/base/background_cleaning.cc ================================================ // Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/background_cleaning.h" #include #include "absl/base/thread_annotations.h" #include "absl/synchronization/mutex.h" #include "absl/time/time.h" #include "riegeli/base/assert.h" #include "riegeli/base/parallelism.h" namespace riegeli { BackgroundCleanee::~BackgroundCleanee() = default; // Key method. BackgroundCleaner::Token BackgroundCleaner::Register( BackgroundCleanee* cleanee) { absl::MutexLock lock(mutex_); entries_.emplace_front(cleanee, absl::InfiniteFuture()); return Token(entries_.begin()); } void BackgroundCleaner::Unregister(Token token) { absl::MutexLock lock(mutex_); CancelCleaningInternal(token); if (next_ == token.iter()) ++next_; entries_.erase(token.iter()); } void BackgroundCleaner::CancelCleaning(Token token) { absl::MutexLock lock(mutex_); CancelCleaningInternal(token); if (token.iter()->deadline == absl::InfiniteFuture()) return; // Move `token.iter()` before `next_`. if (next_ == token.iter()) { ++next_; } else { entries_.splice(next_, entries_, token.iter()); } token.iter()->deadline = absl::InfiniteFuture(); } // Waits until this cleanee is not being cleaned. inline void BackgroundCleaner::CancelCleaningInternal(Token token) { struct Args { BackgroundCleanee** current_cleanee; BackgroundCleanee* cleanee_to_unregister; }; Args args{¤t_cleanee_, token.iter()->cleanee}; mutex_.Await(absl::Condition( +[](Args* args) { return *args->current_cleanee != args->cleanee_to_unregister; }, &args)); } void BackgroundCleaner::ScheduleCleaningSlow(Token token, absl::Time deadline) { absl::MutexLock lock(mutex_); if (token.iter()->deadline <= deadline) { // Cleaning is already scheduled with the same or earlier deadline. return; } // Move `token.iter()` to the right place after `next_`. Entries::iterator iter = token.iter()->deadline == absl::InfiniteFuture() ? entries_.end() // Schedule new cleaning: move from before `next_`. : token.iter(); // Reduce deadline: move backwards. for (;;) { if (iter == next_) { // Insert `token.iter()` before `iter` which is `next_`. next_ = token.iter(); deadline_reduced_ = true; break; } const Entries::iterator last_iter = iter; --iter; if (iter->deadline <= deadline) { // Insert `token.iter()` after `iter`, i.e. before `last_iter`. // This might be its old place, then `splice()` does nothing. iter = last_iter; break; } } entries_.splice(iter, entries_, token.iter()); RIEGELI_ASSERT(next_ != entries_.end()) << "next_ must cover at least token.iter()"; token.iter()->deadline = deadline; // Start a background thread if needed. if (!no_background_thread_) return; no_background_thread_ = false; internal::ThreadPool::global().Schedule([this] { absl::MutexLock lock(mutex_); BackgroundThread(); no_background_thread_ = true; }); } inline void BackgroundCleaner::BackgroundThread() ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_) { if (next_ == entries_.end()) return; for (;;) { // Wait until the next deadline. do { deadline_reduced_ = false; mutex_.AwaitWithDeadline(absl::Condition(&deadline_reduced_), next_->deadline); if (next_ == entries_.end()) return; } while (deadline_reduced_); // Schedule cleaning. for (;;) { const absl::Time now = TimeNow(); if (next_->deadline > now) break; BackgroundCleanee* const cleanee = next_->cleanee; next_->deadline = absl::InfiniteFuture(); ++next_; current_cleanee_ = cleanee; mutex_.unlock(); cleanee->Clean(now); mutex_.lock(); current_cleanee_ = nullptr; if (next_ == entries_.end()) return; } } } BackgroundCleaner::~BackgroundCleaner() { RIEGELI_CHECK(entries_.empty()) << "Failed precondition of BackgroundCleaner::~BackgroundCleaner(): " "some cleanees remain registered"; absl::MutexLock lock(mutex_); // Request the background thread to exit. deadline_reduced_ = true; mutex_.Await(absl::Condition(&no_background_thread_)); } } // namespace riegeli ================================================ FILE: riegeli/base/background_cleaning.h ================================================ // Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_BACKGROUND_CLEANING_H_ #define RIEGELI_BASE_BACKGROUND_CLEANING_H_ #include #include "absl/base/thread_annotations.h" #include "absl/synchronization/mutex.h" #include "absl/time/clock.h" #include "absl/time/time.h" #include "riegeli/base/global.h" namespace riegeli { // An interface of objects which need background cleaning. class BackgroundCleanee { public: virtual ~BackgroundCleanee(); protected: friend class BackgroundCleaner; // For `Clean()`. // Called from a background thread when a scheduled cleaning time arrived. // // `now` is the current time, passed as a parameter so that there is no need // to call `BackgroundCleaner::TimeNow()` again if that time is needed to // decide what to clean. virtual void Clean(absl::Time now) = 0; }; // Manages objects which need background cleaning, scheduling cleaning calls // from a background thread. class BackgroundCleaner { private: struct Entry { explicit Entry(BackgroundCleanee* cleanee, absl::Time deadline) : cleanee(cleanee), deadline(deadline) {} BackgroundCleanee* cleanee; absl::Time deadline; }; using Entries = std::list; public: // Registration token of an object which needs background cleaning. class Token { public: Token() = default; Token(const Token& that) = default; Token& operator=(const Token& that) = default; private: friend class BackgroundCleaner; // For `Token()` and `iter()`. explicit Token(Entries::iterator iter) : iter_(iter) {} Entries::iterator iter() const { return iter_; } Entries::iterator iter_{}; }; BackgroundCleaner() = default; BackgroundCleaner(const BackgroundCleaner&) = delete; BackgroundCleaner& operator=(const BackgroundCleaner&) = delete; // Precondition: all registered cleanees have been unregistered. ~BackgroundCleaner(); // Returns a default global `BackgroundCleaner`. static BackgroundCleaner& global() { return Global([] { return BackgroundCleaner(); }); } // Registers the cleanee, allowing `ScheduleCleaning()` calls. // // Thread safe. Token Register(BackgroundCleanee* cleanee); // Unregisters the cleanee corresponding to `token`, invalidating `token` and // cancelling any pending cleaning. // // This might block if the cleanee is being cleaned or will be cleaned soon, // so this must not be called under a mutex needed for cleaning. // // Thread safe. void Unregister(Token token); // Cancels any pending cleaning corresponding to `token`. Does not unregister // the cleanee. // // This might block if the cleanee is being cleaned or will be cleaned soon, // so this must not be called under a mutex needed for cleaning. // // Thread safe. void CancelCleaning(Token token); // Schedules cleaning the cleanee corresponding to `token` at `deadline`. // // If `deadline` is `absl::InfiniteFuture()`, cleaning will never happen. // If `deadline` is in the past, cleaning will be scheduled immediately. // // If `ScheduleCleaning()` is called again for the same cleanee with a pending // cleaning, its deadline can be reduced, but extending the deadline has no // effect. // // Thread safe. void ScheduleCleaning(Token token, absl::Time deadline); // Returns the current time according to the same clock that // `BackgroundCleaner` is using. // // Thread safe. absl::Time TimeNow(); private: void CancelCleaningInternal(Token token) ABSL_EXCLUSIVE_LOCKS_REQUIRED(mutex_); void ScheduleCleaningSlow(Token token, absl::Time deadline); void BackgroundThread(); absl::Mutex mutex_; // Registered cleanees, partitioned so that `entries_` before `next_` do not // have pending cleaning and have `deadline == absl::InfiniteFuture()`, while // `entries_` at and after `next_` have pending cleaning and are sorted by // their `deadline` which is never `absl::InfiniteFuture()`. Entries entries_ ABSL_GUARDED_BY(mutex_); Entries::iterator next_ ABSL_GUARDED_BY(mutex_) = entries_.begin(); // If not `nullptr`, this cleanee is currently being cleaned. This is used to // avoid a race between `Unregister()` and cleaning. BackgroundCleanee* current_cleanee_ ABSL_GUARDED_BY(mutex_) = nullptr; // If `true`, the next deadline might have been reduced since the background // thread started waiting for it. This wakes up the thread and lets it recheck // the next deadline. // // This is also used to request the thread to exit when `next_ == next_end()`. bool deadline_reduced_ ABSL_GUARDED_BY(mutex_) = false; // If `false`, the background thread is active. This is negated for easier // `absl::Condition()`. bool no_background_thread_ ABSL_GUARDED_BY(mutex_) = true; }; // Implementation details follow. inline void BackgroundCleaner::ScheduleCleaning(Token token, absl::Time deadline) { if (deadline == absl::InfiniteFuture()) return; ScheduleCleaningSlow(token, deadline); } inline absl::Time BackgroundCleaner::TimeNow() { return absl::Now(); } } // namespace riegeli #endif // RIEGELI_BASE_BACKGROUND_CLEANING_H_ ================================================ FILE: riegeli/base/binary_search.h ================================================ // Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_BINARY_SEARCH_H_ #define RIEGELI_BASE_BINARY_SEARCH_H_ #include #include #include #include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/status/status.h" #include "absl/status/statusor.h" #include "riegeli/base/compare.h" namespace riegeli { // Explains the result of a binary search. // // Assumptions: // * All `less` positions precede all `equivalent` positions. // * All `equivalent` positions precede all `greater` positions. // * All `less` positions precede all `greater` positions, // even if there are no `equivalent` positions. // // Interpretation of the result of a binary search, depending on `ordering`: // * `equivalent` - There is some `equivalent` position, // and `found` is some such position. // * `greater` - There are no `equivalent` positions // but there is some `greater` position, // and `found` is the earliest such position. // * `less` - There are no `equivalent` nor `greater` positions // but there is some `less` position, // and `found` is the end of the range to search. // * `unordered` - All positions are `unordered`, // and `found` is the end of the range to search. template struct SearchResult { PartialOrdering ordering; Pos found; }; // The `test()` parameter of `BinarySearch()` is a function which returns // an ordering (a value comparable with literal 0, such as // `{Partial,Strong}Ordering`, `{std,absl}::{partial,weak,strong}_ordering`, // or `int`) or `SearchGuide`. // // If the earliest interesting position after `current` can be found // independently from `test(current)`, `test(current)` can return an ordering. // The next position will be `traits.Next(current)`. // // If the earliest interesting position after `current` can be more easily found // as a side effect of `test(current)`, `test(current)` can return // `SearchGuide`. If `ordering >= 0` (i.e. `ordering` is `equivalent` or // `greater`), the associated `next` should be `current` (or another position // to replace `current` with). Otherwise (i.e. `ordering` is `less` or // `unordered`), the associated `next` should be the earliest interesting // position after `current`. template struct SearchGuide { template ::value, int> = 0> explicit SearchGuide(Ordering ordering, const Pos& next) : ordering(AsPartialOrdering(ordering)), next(next) {} template ::value, int> = 0> explicit SearchGuide(Ordering ordering, Pos&& next) : ordering(AsPartialOrdering(ordering)), next(std::move(next)) {} SearchGuide(const SearchGuide& that) = default; SearchGuide& operator=(const SearchGuide& that) = default; SearchGuide(SearchGuide&& that) = default; SearchGuide& operator=(SearchGuide&& that) = default; template , int> = 0> /*implicit*/ SearchGuide(const SearchGuide& that) : ordering(that.ordering), next(that.next) {} template , int> = 0> SearchGuide& operator=(const SearchGuide& that) { ordering = that.ordering; next = that.next; return *this; } template , int> = 0> /*implicit*/ SearchGuide(SearchGuide&& that) : ordering(that.ordering), next(std::move(that.next)) {} template , int> = 0> SearchGuide& operator=(SearchGuide&& that) { ordering = that.ordering; next = std::move(that.next); return *this; } PartialOrdering ordering; Pos next; }; template explicit SearchGuide(Ordering ordering, Pos next) -> SearchGuide>; namespace binary_search_internal { template struct IsSearchGuide : std::false_type {}; template struct IsSearchGuide, Pos> : std::is_convertible {}; template struct IsOrderingOrSearchGuide : std::disjunction, IsSearchGuide> {}; template struct IsOptionalOrderingOrSearchGuide : std::false_type {}; template struct IsOptionalOrderingOrSearchGuide, Pos> : IsOrderingOrSearchGuide {}; template struct IsStatusOrOrderingOrSearchGuide : std::false_type {}; template struct IsStatusOrOrderingOrSearchGuide, Pos> : IsOrderingOrSearchGuide {}; template struct TestReturnsOrderingOrSearchGuide : std::false_type {}; template struct TestReturnsOrderingOrSearchGuide< Test, Pos, std::enable_if_t()(std::declval())), Pos>::value>> : std::true_type {}; template struct TestReturnsOptionalOrderingOrSearchGuide : std::false_type {}; template struct TestReturnsOptionalOrderingOrSearchGuide< Test, Pos, std::enable_if_t()(std::declval())), Pos>::value>> : std::true_type {}; template struct TestReturnsStatusOrOrderingOrSearchGuide : std::false_type {}; template struct TestReturnsStatusOrOrderingOrSearchGuide< Test, Pos, std::enable_if_t()(std::declval())), Pos>::value>> : std::true_type {}; } // namespace binary_search_internal // Searches a sequence of elements for a desired element, or for a desired // position between elements, given that it is possible to determine whether a // given position is before or after the desired position. // // The `traits` parameter specifies the space of possible positions. // See `DefaultSearchTraits` documentation for details. The default `traits` are // `DefaultSearchTraits()`. // // The `low` (inclusive) and `high` (exclusive) parameters specify the range to // search. // // The `test()` function takes `current` of type `Traits::Pos` as a parameter // and returns an ordering: // * `less` - `current` is before the desired position. // * `equivalent` - `current` is desired, searching can stop. // * `greater` - `current` is after the desired position. // * `unordered` - It could not be determined which is the case. `current` // will be skipped. // // Alternatively, `test()` can return `SearchGuide`. See // `SearchGuide` documentation for details. // // Preconditions: // * All `less` positions precede all `equivalent` positions. // * All `equivalent` positions precede all `greater` positions. // * All `less` positions precede all `greater` positions, // even if there are no `equivalent` positions. // // For interpretation of the result, see `SearchResult` documentation. // // To find the earliest `equivalent` position instead of an arbitrary one, // `test()` can be changed to return `greater` in place of `equivalent`. // // Further guarantees: // * Each `traits.Next(current)` immediately follows a `test(current)` which // returned `less` or `unordered`. // * Each `test(current)` immediately follows a `traits.Next()` which returned // `current`, or a `test()` which returned a `SearchGuide` containing `less` // or `unordered` together with `current`, or a `traits.Middle()` which // returned `current`. // * If `test(current)` returns `equivalent`, `BinarySearch()` immediately // returns `current`. // * If `test(current)` returns `less`, `test()` will not be called again // with arguments before `current`. // * If `test(current)` returns `greater`, `test()` will not be called again // with arguments after `current`. // * `test()` will not be called again with the same argument. // // It follows that if a `test()` returns `equivalent` or `greater`, // `BinarySearch()` returns the argument of the last `test()` call with one of // these results. This allows to communicate additional context of an // `equivalent` or `greater` result by a side effect of `test()`. template < typename Pos, typename Test, std::enable_if_t::value, int> = 0> SearchResult BinarySearch(Pos low, Pos high, Test&& test); template < typename Traits, typename Test, std::enable_if_t::value, int> = 0> SearchResult BinarySearch(typename Traits::Pos low, typename Traits::Pos high, Test&& test, const Traits& traits); // A variant of `BinarySearch()` which supports cancellation. // // If a `test()` returns `std::nullopt`, `BinarySearch()` returns // `std::nullopt`. template ::value, int> = 0> std::optional> BinarySearch(Pos low, Pos high, Test&& test); template ::value, int> = 0> std::optional> BinarySearch( typename Traits::Pos low, typename Traits::Pos high, Test&& test, const Traits& traits); // A variant of `BinarySearch()` which supports cancellation with a `Status`. // // If a `test()` returns a failed `absl::StatusOr`, `BinarySearch()` returns // the corresponding failed `absl::StatusOr`. template ::value, int> = 0> absl::StatusOr> BinarySearch(Pos low, Pos high, Test&& test); template ::value, int> = 0> absl::StatusOr> BinarySearch( typename Traits::Pos low, typename Traits::Pos high, Test&& test, const Traits& traits); // The `traits` parameter of `BinarySearch()` specifies the space of positions // to search. // // Some positions might be determined to be uninteresting, which means that for // the purposes of the search they are equivalent to a nearby interesting // position. They are skipped during the search. // // `DefaultSearchTraits` might be appropriate for positions of an // arithmetic type. If custom traits are needed instead, these comments specify // generalized requirements of the traits. template class DefaultSearchTraits { public: // Identifies a position between elements being searched. This type must be // copyable. using Pos = T; // Returns the earliest interesting position after `current`. // // `Next()` is used only if the `test()` parameter of `BinarySearch()` returns // an ordering. If `test()` returns `SearchGuide`, the result of `test()` // provides the next position instead. // // Precondition: `test(current)` returned `less` or `unordered`. T Next(T current) const { return current + 1; } // Returns `true` if the range between `low` and `high` contains no positions. bool Empty(T low, T high) const { return low >= high; } // Returns a position in the range from `low` (inclusive) to `high` // (exclusive) which is approximately halfway between `low` and `high`. // Returns `std::nullopt` if the range contains no interesting positions. std::optional Middle(T low, T high) const { if (low >= high) return std::nullopt; return low + (high - low) / 2; } }; // Implementation details follow. namespace binary_search_internal { template ::value, int> = 0> inline SearchGuide GetSearchGuide( Ordering ordering, typename Traits::Pos&& pos, const Traits& traits) { return SearchGuide( AsPartialOrdering(ordering), ordering >= 0 ? std::move(pos) : traits.Next(std::move(pos))); } template inline SearchGuide GetSearchGuide( SearchGuide&& guide, ABSL_ATTRIBUTE_UNUSED typename Traits::Pos&& pos, ABSL_ATTRIBUTE_UNUSED const Traits& traits) { return std::move(guide); } template struct CancelSearch; template struct CancelSearch::value>> { static PartialOrdering DoCancel(ABSL_ATTRIBUTE_UNUSED const Pos& pos) { return PartialOrdering::equivalent; } static PartialOrdering DoNotCancel(Ordering ordering) { return AsPartialOrdering(ordering); } }; template struct CancelSearch> { static SearchGuide DoCancel(const Pos& pos) { return SearchGuide(PartialOrdering::equivalent, pos); } static SearchGuide DoNotCancel(SearchGuide&& guide) { return std::move(guide); } }; } // namespace binary_search_internal template < typename Pos, typename Test, std::enable_if_t::value, int>> inline SearchResult BinarySearch(Pos low, Pos high, Test&& test) { return BinarySearch(std::move(low), std::move(high), std::forward(test), DefaultSearchTraits()); } template < typename Traits, typename Test, std::enable_if_t::value, int>> inline SearchResult BinarySearch( typename Traits::Pos low, typename Traits::Pos high, Test&& test, const Traits& traits) { // Invariants: // * All positions between the original `low` and the current `low` are // `less` or `unordered`. // * All positions between the current `high` and the original `high` are // `greater` or `unordered`. // // Invariants depending on `greater_result.ordering`: // * `greater` - `greater_result.found` is the first `greater` position // between the current `high` and the original `high`. // * `less` - There are no such positions but there are `less` positions // between the original `low` and the current `low`, // and `greater_result.found` is `high`. // * `unordered` - There are no such positions either, // and `greater_result.found` is `high`. using Pos = typename Traits::Pos; SearchResult greater_result = {PartialOrdering::unordered, high}; again: std::optional middle_before_unordered = traits.Middle(low, high); if (middle_before_unordered == std::nullopt) return greater_result; Pos middle = *middle_before_unordered; // Invariant: all positions between `*middle_before_unordered` and `middle` // are `unordered`. bool unordered_found = false; for (;;) { auto test_result = test(middle); SearchGuide guide = binary_search_internal::GetSearchGuide( std::move(test_result), std::move(middle), traits); if (guide.ordering < 0) { if (!(greater_result.ordering >= 0)) { greater_result.ordering = PartialOrdering::less; } low = std::move(guide.next); goto again; } if (guide.ordering == 0) { // Assign instead of returning for NRVO. greater_result.ordering = PartialOrdering::equivalent; greater_result.found = std::move(guide.next); return greater_result; } if (guide.ordering > 0) { greater_result.ordering = PartialOrdering::greater; greater_result.found = std::move(guide.next); if (unordered_found) break; // Use the position from `guide` instead of `*middle_before_unordered` // in case the guide provides an earlier upper bound. high = greater_result.found; goto again; } unordered_found = true; if (traits.Empty(guide.next, high)) break; middle = std::move(guide.next); } // Either a `greater` position was found after some `unordered` positions, // or all positions between `*middle_before_unordered` and `high` are // `unordered`. high = *std::move(middle_before_unordered); goto again; } template ::value, int>> inline std::optional> BinarySearch(Pos low, Pos high, Test&& test) { return BinarySearch(std::move(low), std::move(high), std::forward(test), DefaultSearchTraits()); } template ::value, int>> inline std::optional> BinarySearch( typename Traits::Pos low, typename Traits::Pos high, Test&& test, const Traits& traits) { bool cancelled = false; SearchResult result = BinarySearch( std::move(low), std::move(high), [&](const typename Traits::Pos& pos) { auto test_result = test(pos); using Cancel = binary_search_internal::CancelSearch< typename Traits::Pos, std::decay_t>; if (ABSL_PREDICT_FALSE(test_result == std::nullopt)) { cancelled = true; return Cancel::DoCancel(pos); } return Cancel::DoNotCancel(*std::move(test_result)); }, traits); if (ABSL_PREDICT_FALSE(cancelled)) return std::nullopt; return result; } template ::value, int>> inline absl::StatusOr> BinarySearch(Pos low, Pos high, Test&& test) { return BinarySearch(std::move(low), std::move(high), std::forward(test), DefaultSearchTraits()); } template ::value, int>> inline absl::StatusOr> BinarySearch( typename Traits::Pos low, typename Traits::Pos high, Test&& test, const Traits& traits) { absl::Status status; SearchResult result = BinarySearch( std::move(low), std::move(high), [&](const typename Traits::Pos& pos) { auto test_result = test(pos); using Cancel = binary_search_internal::CancelSearch< typename Traits::Pos, std::decay_t>; if (ABSL_PREDICT_FALSE(!test_result.ok())) { status = test_result.status(); return Cancel::DoCancel(pos); } return Cancel::DoNotCancel(*std::move(test_result)); }, traits); if (ABSL_PREDICT_FALSE(!status.ok())) return status; return result; } } // namespace riegeli #endif // RIEGELI_BASE_BINARY_SEARCH_H_ ================================================ FILE: riegeli/base/buffer.cc ================================================ // Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/buffer.h" #include #include "absl/strings/string_view.h" #include "riegeli/base/arithmetic.h" namespace riegeli { void Buffer::DumpStructure(absl::string_view substr, std::ostream& dest) const { dest << "[buffer] {"; if (!substr.empty()) { if (substr.data() != data()) { dest << " space_before: " << PtrDistance(data(), substr.data()); } dest << " space_after: " << PtrDistance(substr.data() + substr.size(), data() + capacity()); } dest << " }"; } } // namespace riegeli ================================================ FILE: riegeli/base/buffer.h ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_BUFFER_H_ #define RIEGELI_BASE_BUFFER_H_ #include #include #include // IWYU pragma: keep #include #include "absl/base/attributes.h" #include "absl/strings/string_view.h" #include "riegeli/base/assert.h" #include "riegeli/base/buffering.h" #include "riegeli/base/estimated_allocated_size.h" #include "riegeli/base/external_data.h" namespace riegeli { // Dynamically allocated byte buffer. class ABSL_ATTRIBUTE_TRIVIAL_ABI Buffer { public: Buffer() = default; // Ensures at least `min_capacity` of space. explicit Buffer(size_t min_capacity); // The source `Buffer` is left deallocated. Buffer(Buffer&& that) noexcept; Buffer& operator=(Buffer&& that) noexcept; ~Buffer() { DeleteInternal(); } // Ensures at least `min_capacity` of space. Existing contents are lost. // // Drops the allocation if the resulting capacity would be wasteful for // `min_capacity`. ABSL_ATTRIBUTE_REINITIALIZES void Reset(size_t min_capacity = 0); // Returns the data pointer. char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return data_; } // Returns the usable data size. It can be greater than the requested size. size_t capacity() const { return capacity_; } // Indicates support for `ExternalRef(Buffer&&, substr)`. friend void RiegeliSupportsExternalRef(Buffer*) {} // Supports `ExternalRef`. friend ExternalStorage RiegeliToExternalStorage(Buffer* self) { self->capacity_ = 0; return ExternalStorage(std::exchange(self->data_, nullptr), [](void* ptr) { operator delete(ptr); }); } // Supports `ExternalRef` and `Chain::Block`. friend void RiegeliDumpStructure(const Buffer* self, absl::string_view substr, std::ostream& dest) { self->DumpStructure(substr, dest); } // Supports `MemoryEstimator`. template friend void RiegeliRegisterSubobjects(const Buffer* self, MemoryEstimator& memory_estimator) { memory_estimator.RegisterDynamicMemory(self->data_, self->capacity_); } private: void AllocateInternal(size_t min_capacity); void DeleteInternal() { operator delete(data_, capacity_); } void DumpStructure(absl::string_view substr, std::ostream& dest) const; char* data_ = nullptr; size_t capacity_ = 0; // Invariant: if `data_ == nullptr` then `capacity_ == 0` }; // Implementation details follow. inline Buffer::Buffer(size_t min_capacity) { AllocateInternal(min_capacity); } inline Buffer::Buffer(Buffer&& that) noexcept : data_(std::exchange(that.data_, nullptr)), capacity_(std::exchange(that.capacity_, 0)) {} inline Buffer& Buffer::operator=(Buffer&& that) noexcept { // Exchange `that.data_` early to support self-assignment. char* const data = std::exchange(that.data_, nullptr); DeleteInternal(); data_ = data; capacity_ = std::exchange(that.capacity_, 0); return *this; } inline void Buffer::Reset(size_t min_capacity) { if (data_ != nullptr) { if (capacity_ >= min_capacity && !Wasteful(capacity_, min_capacity)) return; DeleteInternal(); data_ = nullptr; capacity_ = 0; } AllocateInternal(min_capacity); } inline void Buffer::AllocateInternal(size_t min_capacity) { if (min_capacity > 0) { const size_t capacity = EstimatedAllocatedSize(min_capacity); data_ = static_cast(operator new(capacity)); capacity_ = capacity; } } } // namespace riegeli #endif // RIEGELI_BASE_BUFFER_H_ ================================================ FILE: riegeli/base/buffering.h ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_BUFFERING_H_ #define RIEGELI_BASE_BUFFERING_H_ #include #include #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/types.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // Typical bounds of sizes of memory blocks holding pieces of data in objects. inline constexpr size_t kDefaultMinBlockSize = 512; inline constexpr size_t kDefaultMaxBlockSize = size_t{64} << 10; // When deciding whether to copy an array of bytes or share memory, prefer // copying up to this length. // // Copying can often be done in an inlined fast path. Sharing has more overhead, // especially in a virtual slow path, so copying sufficiently short lengths // performs better. inline constexpr size_t kMaxBytesToCopy = 511; // Recommends the length of a buffer by modifying the base recommendation. // // If `pos` did not pass `size_hint` yet, returns the remaining length instead // of `base_length`. inline Position ApplySizeHint(Position base_length, std::optional size_hint, Position pos) { if (size_hint != std::nullopt && pos <= *size_hint) return *size_hint - pos; return base_length; } // Recommends the length of a buffer by modifying the base recommendation. // // The following constraints are applied, in the order of weakest to strongest: // * At least `recommended_length`. // * At most `max_length`. // * At least `min_length`. inline size_t ApplyBufferConstraints(Position base_length, size_t min_length, size_t recommended_length, size_t max_length) { return UnsignedClamp(UnsignedMax(base_length, recommended_length), min_length, max_length); } // Heuristics for whether a data structure with `allocated` bytes utilizing // `used` bytes for actual data is considered wasteful: `allocated` is larger // than `2 * used + kDefaultMinBlockSize` (512). inline bool Wasteful(size_t allocated, size_t used) { if (ABSL_PREDICT_FALSE(used > allocated)) return false; const size_t unused = allocated - used; if (ABSL_PREDICT_TRUE(unused <= kDefaultMinBlockSize)) return false; return unused - riegeli::kDefaultMinBlockSize > used; } } // namespace riegeli #endif // RIEGELI_BASE_BUFFERING_H_ ================================================ FILE: riegeli/base/byte_fill.cc ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/byte_fill.h" #include #include #include #include #include #include #include #include #include "absl/numeric/bits.h" #include "absl/strings/cord.h" #include "absl/strings/cord_buffer.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/chain.h" #include "riegeli/base/cord_utils.h" #include "riegeli/base/external_ref.h" #include "riegeli/base/global.h" #include "riegeli/base/shared_buffer.h" #include "riegeli/base/types.h" namespace riegeli { inline const char* ByteFill::ZeroBlock::Data() { return Global([] { return new char[kSize](); }); } Chain::Block ByteFill::ZeroBlock::ToChainBlock(absl::string_view substr) { if (substr.size() == kSize) { return Global([] { return Chain::Block(ZeroBlock(), absl::string_view(Data(), kSize)); }); } return Chain::Block(ZeroBlock(), substr); } absl::Cord ByteFill::ZeroBlock::ToCord(absl::string_view substr) { static constexpr auto kNullReleaser = [] {}; if (substr.size() == kSize) { return Global([] { return absl::MakeCordFromExternal(absl::string_view(Data(), kSize), kNullReleaser); }); } return absl::MakeCordFromExternal(substr, kNullReleaser); } void ByteFill::ZeroBlock::DumpStructure(std::ostream& dest) { dest << "[zero_fill] { }"; } void ByteFill::LargeBlock::DumpStructure(absl::string_view substr, std::ostream& dest) const { dest << "[large_fill] {"; const size_t ref_count = buffer_.GetRefCount(); if (ref_count != 1) dest << " ref_count: " << ref_count; if (buffer_.capacity() != substr.size()) { dest << " capacity: " << buffer_.capacity(); } dest << " }"; } ByteFill::Blocks::Blocks(Position size, char fill) { if (size == 0) return; if (fill == '\0') { RIEGELI_ASSERT(std::holds_alternative(block_)); num_blocks_ = (size - 1) / ZeroBlock::kSize + 1; non_last_block_size_ = uint32_t{ZeroBlock::kSize}; last_block_size_ = static_cast(size - 1) % uint32_t{ZeroBlock::kSize} + 1; data_ = ZeroBlock::Data(); return; } if (size <= SmallBlock::kSize) { num_blocks_ = 1; last_block_size_ = IntCast(size); data_ = block_.emplace(fill).data(); return; } if (size <= kMaxSizeForSingleBlock) { num_blocks_ = 1; non_last_block_size_ = IntCast(size); last_block_size_ = non_last_block_size_; } else { const int block_size_bits = SignedMin( (kBlockSizeBitsBias + IntCast(absl::bit_width(size))) / 2, 16); num_blocks_ = ((size - 1) >> block_size_bits) + 1; non_last_block_size_ = uint32_t{1} << block_size_bits; last_block_size_ = (static_cast(size - 1) & (non_last_block_size_ - 1)) + 1; } data_ = block_.emplace(non_last_block_size_, fill).data(); } ByteFill::operator Chain() const { Chain dest; if (size_ <= (fill_ == '\0' ? Chain::kMaxBytesToCopyToEmpty : Blocks::kMaxSizeForSingleBlock)) { if (size_ > 0) { const absl::Span buffer = dest.AppendFixedBuffer( IntCast(size_), Chain::Options().set_size_hint(IntCast(size_))); std::memset(buffer.data(), fill_, buffer.size()); } } else { RIEGELI_CHECK_LE(size_, std::numeric_limits::max()) << "Chain size overflow"; Chain::Options options; options.set_size_hint(IntCast(size_)); const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cbegin(); RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above"; do { dest.Append(*iter, options); } while (++iter != blocks.cend()); } return dest; } ByteFill::operator absl::Cord() const { absl::Cord dest; if (size_ <= UnsignedMin(fill_ == '\0' ? cord_internal::kMaxBytesToCopyToEmptyCord : Blocks::kMaxSizeForSingleBlock, absl::CordBuffer::kDefaultLimit)) { if (size_ > 0) { absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(IntCast(size_)); buffer.SetLength(IntCast(size_)); std::memset(buffer.data(), fill_, IntCast(size_)); dest.Append(std::move(buffer)); } } else { RIEGELI_CHECK_LE(size_, std::numeric_limits::max()) << "Cord size overflow"; const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cbegin(); RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above"; do { ExternalRef(*iter).AppendTo(dest); } while (++iter != blocks.cend()); } return dest; } void ByteFill::AssignTo(Chain& dest) const { dest.Clear(); if (size_ <= (fill_ == '\0' ? Chain::kMaxBytesToCopyToEmpty : Blocks::kMaxSizeForSingleBlock)) { if (empty()) return; const absl::Span buffer = dest.AppendFixedBuffer( IntCast(size_), Chain::Options().set_size_hint(IntCast(size_))); std::memset(buffer.data(), fill_, buffer.size()); } else { RIEGELI_CHECK_LE(size_, std::numeric_limits::max()) << "Chain size overflow"; Chain::Options options; options.set_size_hint(IntCast(size_)); const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cbegin(); RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above"; do { dest.Append(*iter, options); } while (++iter != blocks.cend()); } } void ByteFill::AssignTo(absl::Cord& dest) const { if (size_ <= UnsignedMin(fill_ == '\0' ? cord_internal::kMaxBytesToCopyToEmptyCord : Blocks::kMaxSizeForSingleBlock, absl::CordBuffer::kDefaultLimit)) { if (size_ == 0) { dest.Clear(); } else { absl::CordBuffer buffer = dest.GetAppendBuffer(0, 0); dest.Clear(); if (buffer.capacity() < IntCast(size_)) { buffer = absl::CordBuffer::CreateWithDefaultLimit(IntCast(size_)); } buffer.SetLength(IntCast(size_)); std::memset(buffer.data(), fill_, IntCast(size_)); dest.Append(std::move(buffer)); } } else { dest.Clear(); RIEGELI_CHECK_LE(size_, std::numeric_limits::max()) << "Cord size overflow"; const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cbegin(); RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above"; do { ExternalRef(*iter).AppendTo(dest); } while (++iter != blocks.cend()); } } void ByteFill::AppendTo(Chain& dest) const { if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy() : Blocks::kMaxSizeForSingleBlock)) { size_t length = IntCast(size_); while (length > 0) { const absl::Span buffer = dest.AppendBuffer(1, length, length); std::memset(buffer.data(), fill_, buffer.size()); length -= buffer.size(); } } else { const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cbegin(); RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above"; do { dest.Append(*iter); } while (++iter != blocks.cend()); } } void ByteFill::AppendTo(Chain& dest, Chain::Options options) const { if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy(options) : Blocks::kMaxSizeForSingleBlock)) { size_t length = IntCast(size_); while (length > 0) { const absl::Span buffer = dest.AppendBuffer(1, length, length, options); std::memset(buffer.data(), fill_, buffer.size()); length -= buffer.size(); } } else { const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cbegin(); RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above"; do { dest.Append(*iter, options); } while (++iter != blocks.cend()); } } void ByteFill::AppendTo(absl::Cord& dest) const { if (size_ <= UnsignedMin(fill_ == '\0' ? cord_internal::MaxBytesToCopyToCord(dest) : Blocks::kMaxSizeForSingleBlock, absl::CordBuffer::kDefaultLimit)) { size_t length = IntCast(size_); if (length == 0) return; { absl::CordBuffer buffer = dest.GetAppendBuffer(0, 1); const size_t existing_length = buffer.length(); if (existing_length > 0) { buffer.SetLength( UnsignedMin(existing_length + length, buffer.capacity())); std::memset(buffer.data() + existing_length, fill_, buffer.length() - existing_length); length -= buffer.length() - existing_length; dest.Append(std::move(buffer)); if (length == 0) return; } } absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(length); buffer.SetLength(length); std::memset(buffer.data(), fill_, length); dest.Append(std::move(buffer)); } else { RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - dest.size()) << "Cord size overflow"; const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cbegin(); RIEGELI_ASSERT(iter != blocks.cend()) << "Empty ByteFill handled above"; do { ExternalRef(*iter).AppendTo(dest); } while (++iter != blocks.cend()); } } void ByteFill::PrependTo(Chain& dest) const { if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy() : Blocks::kMaxSizeForSingleBlock)) { size_t length = IntCast(size_); while (length > 0) { const absl::Span buffer = dest.PrependBuffer(1, length, length); std::memset(buffer.data(), fill_, buffer.size()); length -= buffer.size(); } } else { const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cend(); RIEGELI_ASSERT(iter != blocks.cbegin()) << "Empty ByteFill handled above"; do { --iter; dest.Prepend(*iter); } while (iter != blocks.cbegin()); } } void ByteFill::PrependTo(Chain& dest, Chain::Options options) const { if (size_ <= (fill_ == '\0' ? dest.MaxBytesToCopy(options) : Blocks::kMaxSizeForSingleBlock)) { size_t length = IntCast(size_); while (length > 0) { const absl::Span buffer = dest.PrependBuffer(1, length, length, options); std::memset(buffer.data(), fill_, buffer.size()); length -= buffer.size(); } } else { const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cend(); RIEGELI_ASSERT(iter != blocks.cbegin()) << "Empty ByteFill handled above"; do { --iter; dest.Prepend(*iter, options); } while (iter != blocks.cbegin()); } } void ByteFill::PrependTo(absl::Cord& dest) const { if (size_ <= UnsignedMin(fill_ == '\0' ? cord_internal::MaxBytesToCopyToCord(dest) : Blocks::kMaxSizeForSingleBlock, absl::CordBuffer::kDefaultLimit)) { if (empty()) return; absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(IntCast(size_)); buffer.SetLength(IntCast(size_)); std::memset(buffer.data(), fill_, IntCast(size_)); dest.Prepend(std::move(buffer)); } else { RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - dest.size()) << "Cord size overflow"; const Blocks blocks = this->blocks(); BlockIterator iter = blocks.cend(); RIEGELI_ASSERT(iter != blocks.cbegin()) << "Empty ByteFill handled above"; do { --iter; ExternalRef(*iter).PrependTo(dest); } while (iter != blocks.cbegin()); } } void ByteFill::Output(std::ostream& dest) const { for (const absl::string_view fragment : blocks()) { dest.write(fragment.data(), IntCast(fragment.size())); } } } // namespace riegeli ================================================ FILE: riegeli/base/byte_fill.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_BYTE_FILL_H_ #define RIEGELI_BASE_BYTE_FILL_H_ #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/strings/cord.h" #include "absl/strings/string_view.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/chain.h" #include "riegeli/base/compare.h" #include "riegeli/base/external_data.h" #include "riegeli/base/iterable.h" #include "riegeli/base/shared_buffer.h" #include "riegeli/base/types.h" namespace riegeli { // Represents a byte sequence of the given size with all bytes equal to the // given value. class ByteFill { public: class BlockRef; class BlockIterator; class Blocks; // Constructs a `ByteFill` with `size` occurrences of `fill`. explicit ByteFill(Position size, char fill = '\0') : size_(size), fill_(fill) {} ByteFill(const ByteFill& that) = default; ByteFill& operator=(const ByteFill& that) = default; bool empty() const { return size() == 0; } Position size() const { return size_; } char fill() const { return fill_; } // Removes `difference` occurrences, and returns `ByteFill` corresponding // to the removed fragment. // // Precondition: `difference <= size()` ByteFill Extract(Position difference) { RIEGELI_ASSERT_LE(difference, size_) << "Failed precondition of ByteFill::Extract(): size underflow"; size_ -= difference; return ByteFill(difference, fill_); } // A sequence of non-empty `absl::string_view` blocks comprising data of the // `ByteFill`. Blocks blocks() const; // Converts the data to `Chain`. explicit operator Chain() const; // Converts the data to `absl::Cord`. explicit operator absl::Cord() const; // Supports `riegeli::Reset(Chain&, ByteFill)`. friend void RiegeliReset(Chain& dest, ByteFill src) { src.AssignTo(dest); } // Supports `riegeli::Reset(absl::Cord&, ByteFill)`. friend void RiegeliReset(absl::Cord& dest, ByteFill src) { src.AssignTo(dest); } // Appends the data to `dest`. void AppendTo(Chain& dest) const; void AppendTo(Chain& dest, Chain::Options options) const; // Appends the data to `dest`. void AppendTo(absl::Cord& dest) const; // Prepends the data to `dest`. void PrependTo(Chain& dest) const; void PrependTo(Chain& dest, Chain::Options options) const; // Prepends the data to `dest`. void PrependTo(absl::Cord& dest) const; // Default stringification by `absl::StrCat()` etc. template friend void AbslStringify(Sink& dest, ByteFill src) { Position length = src.size_; while (ABSL_PREDICT_FALSE(length > std::numeric_limits::max())) { dest.Append(std::numeric_limits::max(), src.fill_); length -= std::numeric_limits::max(); } if (length > 0) dest.Append(IntCast(length), src.fill_); } // Writes the occurrences to `out` as unformatted bytes. friend std::ostream& operator<<(std::ostream& dest, ByteFill src) { src.Output(dest); return dest; } private: class ZeroBlock; class SmallBlock; class LargeBlock; void AssignTo(Chain& dest) const; void AssignTo(absl::Cord& dest) const; void Output(std::ostream& dest) const; Position size_; char fill_; }; // Represents a block of zeros backed by a shared array for `ExternalRef`. class ByteFill::ZeroBlock { public: static constexpr size_t kSize = size_t{64} << 10; static const char* Data(); ZeroBlock() = default; ZeroBlock(const ZeroBlock& that) = default; ZeroBlock& operator=(const ZeroBlock& that) = default; // Supports `ExternalRef`. friend Chain::Block RiegeliToChainBlock( ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self, absl::string_view substr) { return ToChainBlock(substr); } // Supports `ExternalRef`. friend absl::Cord RiegeliToCord(ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self, absl::string_view substr) { return ToCord(substr); } // Supports `ExternalRef`. friend ExternalStorage RiegeliToExternalStorage( ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self) { return ExternalStorage(nullptr, [](ABSL_ATTRIBUTE_UNUSED void* ptr) {}); } // Supports `ExternalRef` and `Chain::Block`. friend void RiegeliDumpStructure(ABSL_ATTRIBUTE_UNUSED const ZeroBlock* self, std::ostream& dest) { DumpStructure(dest); } private: static Chain::Block ToChainBlock(absl::string_view substr); static absl::Cord ToCord(absl::string_view substr); static void DumpStructure(std::ostream& dest); }; class ByteFill::SmallBlock { public: static constexpr size_t kSize = 64; explicit SmallBlock(char fill) { std::memset(data_, fill, kSize); } SmallBlock(const SmallBlock& that) = default; SmallBlock& operator=(const SmallBlock& that) = default; const char* data() const { return data_; } // Supports `ExternalRef`. friend bool RiegeliExternalCopy( ABSL_ATTRIBUTE_UNUSED const SmallBlock* self) { return true; } private: char data_[kSize]; }; class ByteFill::LargeBlock { public: explicit LargeBlock(size_t size, char fill) : buffer_(size) { std::memset(buffer_.mutable_data(), fill, size); } LargeBlock(const LargeBlock& that) = default; LargeBlock& operator=(const LargeBlock& that) = default; LargeBlock(LargeBlock&& that) = default; LargeBlock& operator=(LargeBlock&& that) = default; const char* data() const { return buffer_.data(); } // Indicates support for: // * `ExternalRef(const LargeBlock&, substr)` // * `ExternalRef(LargeBlock&&, substr)` friend void RiegeliSupportsExternalRef(const LargeBlock*) {} // Supports `ExternalRef`. friend ExternalStorage RiegeliToExternalStorage(LargeBlock* self) { return RiegeliToExternalStorage(&self->buffer_); } // Supports `ExternalRef` and `Chain::Block`. friend void RiegeliDumpStructure(const LargeBlock* self, absl::string_view substr, std::ostream& dest) { self->DumpStructure(substr, dest); } // Supports `MemoryEstimator`. template friend void RiegeliRegisterSubobjects(const LargeBlock* self, MemoryEstimator& memory_estimator) { memory_estimator.RegisterSubobjects(&self->buffer_); } private: void DumpStructure(absl::string_view substr, std::ostream& dest) const; SharedBuffer buffer_; }; class ByteFill::BlockRef { public: BlockRef(const BlockRef& that) = default; BlockRef& operator=(const BlockRef& that) = default; /*implicit*/ operator absl::string_view() const { return absl::string_view(data(), size()); } bool empty() const { return false; } const char* data() const; size_t size() const; // Indicates support for: // * `ExternalRef(BlockRef)` // * `ExternalRef(BlockRef, substr)` friend void RiegeliSupportsExternalRef(const BlockRef*) {} // Supports `ExternalRef`. template friend void RiegeliExternalDelegate(const BlockRef* self, absl::string_view substr, Callback&& delegate_to) { self->ExternalDelegate(substr, std::forward(delegate_to)); } private: friend class ByteFill; // For `BlockRef()`. explicit BlockRef(const ByteFill::Blocks* blocks, Position block_index_complement) : blocks_(blocks), block_index_complement_(block_index_complement) {} template void ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const; const Blocks* blocks_; // `block_index_complement_` is `blocks_->num_blocks_ - block_index`. Working // with the complement makes it easier to handle special case at 1 (a block // with size `blocks_->last_block_size_`). Position block_index_complement_; }; class ByteFill::BlockIterator : public WithCompare { public: using iterator_concept = std::random_access_iterator_tag; // `iterator_category` is only `std::input_iterator_tag` because the // `LegacyForwardIterator` requirement and above require `reference` to be // a true reference type. using iterator_category = std::input_iterator_tag; using value_type = BlockRef; using reference = value_type; using pointer = ArrowProxy; using difference_type = ptrdiff_t; BlockIterator() = default; BlockIterator(const BlockIterator& that) = default; BlockIterator& operator=(const BlockIterator& that) = default; reference operator*() const; pointer operator->() const; BlockIterator& operator++(); BlockIterator operator++(int); BlockIterator& operator--(); BlockIterator operator--(int); BlockIterator& operator+=(difference_type n); BlockIterator operator+(difference_type n) const; BlockIterator& operator-=(difference_type n); BlockIterator operator-(difference_type n) const; reference operator[](difference_type n) const; friend bool operator==(BlockIterator a, BlockIterator b) { RIEGELI_ASSERT_EQ(a.blocks_, b.blocks_) << "Failed precondition of operator==(ByteFill::BlockIterator): " "incomparable iterators"; return b.block_index_complement_ == a.block_index_complement_; } friend StrongOrdering RIEGELI_COMPARE(BlockIterator a, BlockIterator b) { RIEGELI_ASSERT_EQ(a.blocks_, b.blocks_) << "Failed precondition of operator<=>(ByteFill::BlockIterator): " "incomparable iterators"; return riegeli::Compare(b.block_index_complement_, a.block_index_complement_); } friend difference_type operator-(BlockIterator a, BlockIterator b) { RIEGELI_ASSERT_EQ(a.blocks_, b.blocks_) << "Failed precondition of operator-(ByteFill::BlockIterator): " "incomparable iterators"; return b.block_index_complement_ - a.block_index_complement_; } friend BlockIterator operator+(difference_type n, BlockIterator a) { return a + n; } private: friend class ByteFill; // For `BlockIterator()`. explicit BlockIterator(const Blocks* blocks, Position block_index_complement) : blocks_(blocks), block_index_complement_(block_index_complement) {} const Blocks* blocks_ = nullptr; // `block_index_complement_` is `blocks_->num_blocks_ - block_index`. Working // with the complement makes it easier to handle special cases at 0 (`end()`) // and 1 (a block with size `blocks_->last_block_size_`). Position block_index_complement_ = 0; }; class ByteFill::Blocks { public: using value_type = BlockRef; using reference = value_type; using const_reference = reference; using iterator = BlockIterator; using const_iterator = iterator; using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = reverse_iterator; using size_type = size_t; using difference_type = ptrdiff_t; Blocks() = default; Blocks(Blocks&& that) noexcept; Blocks& operator=(Blocks&&) = delete; iterator begin() const { return iterator(this, num_blocks_); } iterator cbegin() const { return begin(); } iterator end() const { return iterator(this, 0); } iterator cend() const { return end(); } reverse_iterator rbegin() const { return reverse_iterator(end()); } reverse_iterator crbegin() const { return rbegin(); } reverse_iterator rend() const { return reverse_iterator(begin()); } reverse_iterator crend() const { return rend(); } bool empty() const { return size() == 0; } size_type size() const { return num_blocks_; } reference operator[](size_type n) const { RIEGELI_ASSERT_LT(n, size()) << "Failed precondition of ByteFill::Blocks::operator[]: " "block index out of range"; return BlockRef(this, num_blocks_ - n); } reference at(size_type n) const { RIEGELI_CHECK_LT(n, size()) << "Failed precondition of ByteFill::Blocks::at(): " "block index out of range"; return BlockRef(this, num_blocks_ - n); } reference front() const { RIEGELI_ASSERT(!empty()) << "Failed precondition of ByteFill::Blocks::front(): no blocks"; return BlockRef(this, num_blocks_); } reference back() const { RIEGELI_ASSERT(!empty()) << "Failed precondition of ByteFill::Blocks::back(): no blocks"; return BlockRef(this, 1); } private: // For `kMaxSizeForSingleBlock`, `Blocks()`, `data()`, `size()`, and // `ExternalDelegate()`. friend class ByteFill; // Find a balance between the number of blocks and the block size. // The following parameters yield: // * 1K = 1 * 1K // * 2K = 1 * 2K // * 4K = 2 * 2K // * 8K = 2 * 4K // * 16K = 4 * 4K // * 32K = 4 * 8K // * 64K = 8 * 8K // * 128K = 8 * 16K // * 256K = 16 * 16K // * 512K = 16 * 32K // * 1M = 32 * 32K // * 2M = 32 * 64K // * 4M = 64 * 64K // * 8M = 128 * 64K static constexpr int kBlockSizeBitsBias = 10; static constexpr Position kMaxSizeForSingleBlock = Position{1} << (kBlockSizeBitsBias + 1); explicit Blocks(Position size, char fill); const char* data() const { return data_; } size_t size(Position block_index_complement) const { return block_index_complement == 1 ? last_block_size_ : non_last_block_size_; } template void ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const; Position num_blocks_ = 0; uint32_t non_last_block_size_ = 0; uint32_t last_block_size_ = 0; // If `num_blocks_ > 0` then `data_` is: // * When `block_` is `ZeroBlock`: `ZeroBlock::Data()` // * When `block_` is `SmallBlock`: `small_block.data()` // * When `block_` is `LargeBlock`: `large_block.data()` const char* data_ = nullptr; std::variant block_; }; // Implementation details follow. inline const char* ByteFill::BlockRef::data() const { return blocks_->data(); } inline size_t ByteFill::BlockRef::size() const { return blocks_->size(block_index_complement_); } template inline void ByteFill::BlockRef::ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const { blocks_->ExternalDelegate(substr, std::forward(delegate_to)); } inline ByteFill::BlockIterator::reference ByteFill::BlockIterator::operator*() const { RIEGELI_ASSERT_GT(block_index_complement_, 0u) << "Failed precondition of ByteFill::BlockIterator::operator*: " "iterator is end()"; return BlockRef(blocks_, block_index_complement_); } inline ByteFill::BlockIterator::pointer ByteFill::BlockIterator::operator->() const { return pointer(**this); } inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator++() { RIEGELI_ASSERT_GT(block_index_complement_, 0u) << "Failed precondition of ByteFill::BlockIterator::operator++: " "iterator is end()"; --block_index_complement_; return *this; } inline ByteFill::BlockIterator ByteFill::BlockIterator::operator++(int) { const BlockIterator tmp = *this; ++*this; return tmp; } inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator--() { RIEGELI_ASSERT_LT(block_index_complement_, blocks_->size()) << "Failed precondition of ByteFill::BlockIterator::operator--: " "iterator is begin()"; ++block_index_complement_; return *this; } inline ByteFill::BlockIterator ByteFill::BlockIterator::operator--(int) { const BlockIterator tmp = *this; --*this; return tmp; } inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator+=( difference_type n) { if (n >= 0) { RIEGELI_ASSERT_LE(UnsignedCast(n), block_index_complement_) << "Failed precondition of ByteFill::BlockIterator::operator+=: " "iterator after end()"; } else { RIEGELI_ASSERT_LE(NegatingUnsignedCast(n), blocks_->size() - block_index_complement_) << "Failed precondition of ByteFill::BlockIterator::operator+=: " "iterator before begin()"; } block_index_complement_ -= static_cast(n); return *this; } inline ByteFill::BlockIterator ByteFill::BlockIterator::operator+( difference_type n) const { return BlockIterator(*this) += n; } inline ByteFill::BlockIterator& ByteFill::BlockIterator::operator-=( difference_type n) { if (n >= 0) { RIEGELI_ASSERT_LE(UnsignedCast(n), blocks_->size() - block_index_complement_) << "Failed precondition of ByteFill::BlockIterator::operator-=: " "iterator before begin()"; } else { RIEGELI_ASSERT_LE(NegatingUnsignedCast(n), block_index_complement_) << "Failed precondition of ByteFill::BlockIterator::operator-=: " "iterator after end()"; } block_index_complement_ += static_cast(n); return *this; } inline ByteFill::BlockIterator ByteFill::BlockIterator::operator-( difference_type n) const { return BlockIterator(*this) -= n; } inline ByteFill::BlockIterator::reference ByteFill::BlockIterator::operator[]( difference_type n) const { return *(*this + n); } inline ByteFill::Blocks::Blocks(Blocks&& that) noexcept : num_blocks_(std::exchange(that.num_blocks_, 0)), last_block_size_(that.last_block_size_), data_(that.data_), block_(std::move(that.block_)) { if (SmallBlock* const small_block = std::get_if(&block_)) { data_ = small_block->data(); } } template inline void ByteFill::Blocks::ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const { struct Visitor { void operator()(const ZeroBlock& zero_ref) const { std::forward(delegate_to)(zero_ref, substr); } void operator()(const SmallBlock& small_block) const { std::forward(delegate_to)(small_block, substr); } void operator()(const LargeBlock& large_block) const { std::forward(delegate_to)(large_block, substr); } absl::string_view substr; Callback&& delegate_to; }; std::visit(Visitor{substr, std::forward(delegate_to)}, block_); } inline ByteFill::Blocks ByteFill::blocks() const { return Blocks(size_, fill_); } } // namespace riegeli #endif // RIEGELI_BASE_BYTE_FILL_H_ ================================================ FILE: riegeli/base/bytes_ref.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_BYTES_REF_H_ #define RIEGELI_BASE_BYTES_REF_H_ #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/compare.h" #include "riegeli/base/initializer.h" #include "riegeli/base/maker.h" #include "riegeli/base/string_ref.h" #include "riegeli/base/temporary_storage.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `BytesRef` stores an `absl::string_view` representing text or binary data // (see `StringRef` for text data), possibly converted from // `absl::Span` or temporary `std::string`. // // It is intended for function parameters when the implementation needs // an `absl::string_view`, and the caller might have another representation // of the string. // // It is convertible from: // * types convertible to `absl::string_view` // * types convertible to `std::string`, e.g. `BytesInitializer` // * types convertible to `absl::Span`, // e.g. `std::vector` or `std::array`. // // `BytesRef` does not own string contents and is efficiently copyable. class BytesRef : public StringRef, public WithCompare { public: // Stores an empty `absl::string_view`. BytesRef() = default; // Stores `str` converted to `absl::string_view`. ABSL_ATTRIBUTE_ALWAYS_INLINE /*implicit*/ BytesRef(const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND) : StringRef(absl::string_view(str)) {} // Stores `str` converted to `StringRef` and then to `absl::string_view`. template , std::is_convertible>, int> = 0> /*implicit*/ BytesRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND) : StringRef(std::forward(str)) {} // Stores `str` converted to `absl::string_view`. /*implicit*/ BytesRef( absl::Span str ABSL_ATTRIBUTE_LIFETIME_BOUND) : StringRef(absl::string_view(str.data(), str.size())) {} // Stores `str` materialized, then converted to `StringRef` and then to // `absl::string_view`. template , std::negation>, std::is_convertible>, int> = 0> /*implicit*/ BytesRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND, TemporaryStorage&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : StringRef(std::forward(str), std::move(storage)) {} // Stores `str` converted to `absl::Span` and then to // `absl::string_view`. template < typename T, std::enable_if_t< std::conjunction_v, std::negation>, NotSameRef, T>, std::is_convertible>>, int> = 0> /*implicit*/ BytesRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND) : BytesRef(absl::Span(std::forward(str))) {} BytesRef(const BytesRef& that) = default; BytesRef& operator=(const BytesRef&) = delete; friend bool operator==(BytesRef a, BytesRef b) { return absl::string_view(a) == absl::string_view(b); } friend riegeli::StrongOrdering RIEGELI_COMPARE(BytesRef a, BytesRef b) { return riegeli::Compare(absl::string_view(a), absl::string_view(b)); } template < typename T, std::enable_if_t, std::is_convertible>, int> = 0> friend bool operator==(BytesRef a, T&& b) { return a == BytesRef(std::forward(b)); } template < typename T, std::enable_if_t, std::is_convertible>, int> = 0> friend riegeli::StrongOrdering RIEGELI_COMPARE(BytesRef a, T&& b) { return riegeli::Compare(a, BytesRef(std::forward(b))); } // `absl::Span` is already comparable against types convertible to // `absl::Span`, which includes `BytesRef`. }; // `BytesInitializer` is convertible from the same types as `BytesRef`, // but efficiently takes ownership of `std::string`. // // `BytesInitializer` behaves like `Initializer`. class BytesInitializer : public Initializer { public: BytesInitializer() = default; // Stores `str` converted to `absl::string_view` and then to `std::string`. ABSL_ATTRIBUTE_ALWAYS_INLINE /*implicit*/ BytesInitializer(const char* str ABSL_ATTRIBUTE_LIFETIME_BOUND, TemporaryStorage>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : Initializer(std::move(storage).emplace(absl::string_view(str))) {} // Stores `str` converted to `std::string`. template , std::is_convertible>, int> = 0> /*implicit*/ BytesInitializer(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND) : Initializer(std::forward(str)) {} // Stores `str` converted to `BytesRef`, then to `absl::string_view`, and then // to `std::string`. template < typename T, std::enable_if_t, std::negation>, std::is_convertible>, int> = 0> /*implicit*/ BytesInitializer(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND, TemporaryStorage>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : Initializer( std::move(storage).emplace(BytesRef(std::forward(str)))) {} BytesInitializer(BytesInitializer&& that) = default; BytesInitializer& operator=(BytesInitializer&&) = delete; }; } // namespace riegeli #endif // RIEGELI_BASE_BYTES_REF_H_ ================================================ FILE: riegeli/base/c_string_ref.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_C_STRING_REF_H_ #define RIEGELI_BASE_C_STRING_REF_H_ #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/strings/string_view.h" #include "riegeli/base/compare.h" #include "riegeli/base/string_ref.h" #include "riegeli/base/temporary_storage.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `CStringRef` stores a pointer to a C-style NUL-terminated string // or `nullptr`, possibly converted from another string representation. // // It is intended for function parameters when the implementation needs // a C-style NUL-terminated string, and the caller might have another // representation of the string. // // It is convertible from: // * `std::nullptr_t` // * types convertible to `const char*` // * types supporting `c_str()`, e.g. `std::string` or mutable `CompactString` // * types convertible to `absl::string_view` // // It copies string contents when this is needed for NUL-termination, // e.g. for types convertible to `absl::string_view` excluding `std::string` // and mutable `CompactString`. In that case the string is stored in a storage // object passed as a default argument to the constructor. // // `CStringRef` does not own string contents and is efficiently copyable. class ABSL_NULLABILITY_COMPATIBLE CStringRef : public WithEqual { private: template struct HasCStr : std::false_type {}; template struct HasCStr().c_str()), const char*>>> : std::true_type {}; public: // Stores `nullptr`. CStringRef() = default; /*implicit*/ CStringRef(std::nullptr_t) {} // Stores `str`. /*implicit*/ CStringRef( const char* absl_nullable str ABSL_ATTRIBUTE_LIFETIME_BOUND) : c_str_(str) {} // Stores `str` converted to `const char*`. template , NotSameRef, NotSameRef, std::is_convertible>, int> = 0> /*implicit*/ CStringRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND) : c_str_(std::forward(str)) {} // Stores `str.c_str()`. This applies e.g. to `std::string` and // mutable `CompactString`. template , NotSameRef, std::negation>, HasCStr>, int> = 0> /*implicit*/ CStringRef(T&& str ABSL_ATTRIBUTE_LIFETIME_BOUND) : c_str_(std::forward(str).c_str()) {} // Stores a pointer to the first character of a NUL-terminated copy of `str` // converted to `StringRef` and then to `absl::string_view`. // // The string is stored in a storage object passed as a default argument to // this constructor. template < typename T, std::enable_if_t< std::conjunction_v< NotSameRef, NotSameRef, std::negation>, std::negation>, std::is_convertible>, int> = 0> /*implicit*/ CStringRef(T&& str, TemporaryStorage&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : CStringRef(std::move(storage).emplace( absl::string_view(StringRef(std::forward(str))))) {} CStringRef(const CStringRef& that) = default; CStringRef& operator=(const CStringRef&) = delete; // Returns the pointer to the C-style NUL-terminated string, or `nullptr`. const char* absl_nullable c_str() const { return c_str_; } friend bool operator==(CStringRef a, std::nullptr_t) { return a.c_str_ == nullptr; } private: using pointer = const char*; // For `ABSL_NULLABILITY_COMPATIBLE`. const char* absl_nullable c_str_ = nullptr; }; } // namespace riegeli #endif // RIEGELI_BASE_C_STRING_REF_H_ ================================================ FILE: riegeli/base/chain.cc ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/container/inlined_vector.h" #include "absl/strings/cord.h" #include "absl/strings/resize_and_overwrite.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/buffering.h" #include "riegeli/base/bytes_ref.h" #include "riegeli/base/chain_base.h" #include "riegeli/base/chain_details.h" #include "riegeli/base/compare.h" #include "riegeli/base/external_ref_base.h" #include "riegeli/base/initializer.h" #include "riegeli/base/intrusive_shared_ptr.h" #include "riegeli/base/invoker.h" #include "riegeli/base/maker.h" #include "riegeli/base/memory_estimator.h" #include "riegeli/base/new_aligned.h" #include "riegeli/base/null_safe_memcpy.h" #include "riegeli/base/ownership.h" #include "riegeli/base/stream_utils.h" #include "riegeli/base/string_utils.h" namespace riegeli { namespace { // Stores an `absl::Cord` which must be flat, i.e. // `src.TryFlat() != std::nullopt`. // // This design relies on the fact that moving a flat `absl::Cord` results in a // flat `absl::Cord`. class FlatCordBlock { public: explicit FlatCordBlock(Initializer src); FlatCordBlock(FlatCordBlock&& that) = default; FlatCordBlock& operator=(FlatCordBlock&& that) = default; const absl::Cord& src() const { return src_; } /*implicit*/ operator absl::string_view() const; // Supports `ExternalRef` and `Chain::Block`. friend void RiegeliDumpStructure( ABSL_ATTRIBUTE_UNUSED const FlatCordBlock* self, std::ostream& dest) { dest << "[cord] { }"; } // Supports `MemoryEstimator`. template friend void RiegeliRegisterSubobjects(const FlatCordBlock* self, MemoryEstimator& memory_estimator) { memory_estimator.RegisterSubobjects(&self->src_); } private: // Invariant: `src_.TryFlat() != std::nullopt` absl::Cord src_; }; inline FlatCordBlock::FlatCordBlock(Initializer src) : src_(std::move(src)) { RIEGELI_ASSERT(src_.TryFlat() != std::nullopt) << "Failed precondition of FlatCordBlock::FlatCordBlock(): " "Cord is not flat"; } inline FlatCordBlock::operator absl::string_view() const { if (const std::optional flat = src_.TryFlat(); flat != std::nullopt) { return *flat; } RIEGELI_ASSUME_UNREACHABLE() << "Failed invariant of FlatCordBlock: Cord is not flat"; } } // namespace namespace chain_internal { void DumpStructureDefault(std::ostream& dest) { dest << "[external] { }"; } } // namespace chain_internal void RiegeliDumpStructure(const std::string* self, std::ostream& dest) { dest << "[string] { capacity: " << self->capacity() << " }"; } inline IntrusiveSharedPtr Chain::RawBlock::NewInternal( size_t min_capacity) { RIEGELI_ASSERT_GT(min_capacity, 0u) << "Failed precondition of Chain::RawBlock::NewInternal(): zero capacity"; size_t raw_capacity; return IntrusiveSharedPtr(SizeReturningNewAligned( kInternalAllocatedOffset() + min_capacity, &raw_capacity, &raw_capacity)); } inline Chain::RawBlock::RawBlock(const size_t* raw_capacity) : substr_(allocated_begin_, 0), // Redundant cast is needed for `-fsanitize=bounds`. allocated_end_(static_cast(allocated_begin_) + (*raw_capacity - kInternalAllocatedOffset())) { RIEGELI_ASSERT(is_internal()) << "A RawBlock with allocated_end_ != nullptr " "should be considered internal"; RIEGELI_ASSERT_LE(capacity(), RawBlock::kMaxCapacity) << "Chain block capacity overflow"; } inline IntrusiveSharedPtr Chain::RawBlock::Copy() { IntrusiveSharedPtr block = NewInternal(size()); block->Append(*this); RIEGELI_ASSERT(!block->wasteful()) << "A full block should not be considered wasteful"; return block; } inline size_t Chain::RawBlock::space_before() const { RIEGELI_ASSERT(is_internal()) << "Failed precondition of Chain::RawBlock::space_before(): " "block not internal"; return PtrDistance(allocated_begin_, data_begin()); } inline size_t Chain::RawBlock::space_after() const { RIEGELI_ASSERT(is_internal()) << "Failed precondition of Chain::RawBlock::space_after(): " "block not internal"; return PtrDistance(data_end(), allocated_end_); } inline bool Chain::RawBlock::tiny(size_t extra_size) const { if (is_internal()) { RIEGELI_ASSERT_LE(size(), capacity()) << "Failed invariant of Chain::RawBlock: size greater than capacity"; RIEGELI_ASSERT_LE(extra_size, capacity() - size()) << "Failed precondition of Chain::RawBlock::tiny(): " "extra size greater than remaining space"; } else { RIEGELI_ASSERT_EQ(extra_size, 0u) << "Failed precondition of Chain::RawBlock::tiny(): " "non-zero extra size of external block"; } return size() + extra_size < kDefaultMinBlockSize; } inline bool Chain::RawBlock::wasteful(size_t extra_size) const { if (is_internal()) { RIEGELI_ASSERT_LE(size(), capacity()) << "Failed invariant of Chain::RawBlock: size greater than capacity"; RIEGELI_ASSERT_LE(extra_size, capacity() - size()) << "Failed precondition of Chain::RawBlock::wasteful(): " "extra size greater than remaining space"; } else { RIEGELI_ASSERT_EQ(extra_size, 0u) << "Failed precondition of Chain::RawBlock::wasteful(): " "non-zero extra size of external block"; return false; } return Wasteful(kInternalAllocatedOffset() + capacity(), size() + extra_size); } inline void Chain::RawBlock::DumpStructure(std::ostream& dest) const { dest << "block {"; const size_t ref_count = ref_count_.GetCount(); if (ref_count != 1) dest << " ref_count: " << ref_count; dest << " size: " << size(); if (is_internal()) { if (space_before() > 0) dest << " space_before: " << space_before(); dest << " space_after: " << space_after(); } else { dest << " "; external_.methods->dump_structure(*this, dest); } dest << " }"; } size_t Chain::RawBlock::DynamicSizeOf() const { if (is_internal()) { return kInternalAllocatedOffset() + capacity(); } else { return external_.methods->dynamic_sizeof; } } void Chain::RawBlock::RegisterSubobjects( MemoryEstimator& memory_estimator) const { if (!is_internal()) { external_.methods->register_subobjects(this, memory_estimator); } } inline bool Chain::RawBlock::can_append(size_t length) const { return is_mutable() && (empty() ? capacity() : space_after()) >= length; } inline bool Chain::RawBlock::can_prepend(size_t length) const { return is_mutable() && (empty() ? capacity() : space_before()) >= length; } inline absl::Span Chain::RawBlock::AppendBuffer(size_t max_length) ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(is_mutable()) << "Failed precondition of Chain::RawBlock::AppendBuffer(): " "block is immutable"; if (empty()) substr_ = absl::string_view(allocated_begin_, 0); const size_t length = UnsignedMin(space_after(), max_length); const absl::Span buffer(const_cast(data_end()), length); substr_ = absl::string_view(data_begin(), size() + length); return buffer; } inline absl::Span Chain::RawBlock::PrependBuffer(size_t max_length) ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(is_mutable()) << "Failed precondition of Chain::RawBlock::PrependBuffer(): " "block is immutable"; if (empty()) substr_ = absl::string_view(allocated_end_, 0); const size_t length = UnsignedMin(space_before(), max_length); const absl::Span buffer(const_cast(data_begin()) - length, length); substr_ = absl::string_view(data_begin() - length, size() + length); return buffer; } inline void Chain::RawBlock::Append(absl::string_view src, size_t space_before) { if (empty()) { // Redundant cast is needed for `-fsanitize=bounds`. substr_ = absl::string_view( static_cast(allocated_begin_) + space_before, 0); } AppendWithExplicitSizeToCopy(src, src.size()); } inline void Chain::RawBlock::AppendWithExplicitSizeToCopy(absl::string_view src, size_t size_to_copy) { RIEGELI_ASSERT_GE(size_to_copy, src.size()) << "Failed precondition of " "Chain::RawBlock::AppendWithExplicitSizeToCopy(): " "size to copy too small"; RIEGELI_ASSERT(can_append(size_to_copy)) << "Failed precondition of " "Chain::RawBlock::AppendWithExplicitSizeToCopy(): " "not enough space"; std::memcpy(const_cast(data_end()), src.data(), size_to_copy); substr_ = absl::string_view(data_begin(), size() + src.size()); } inline void Chain::RawBlock::Prepend(absl::string_view src, size_t space_after) { RIEGELI_ASSERT(can_prepend(src.size())) << "Failed precondition of Chain::RawBlock::Prepend(): " "not enough space"; if (empty()) substr_ = absl::string_view(allocated_end_ - space_after, 0); std::memcpy(const_cast(data_begin() - src.size()), src.data(), src.size()); substr_ = absl::string_view(data_begin() - src.size(), size() + src.size()); } size_t Chain::BlockIterator::CharIndexInChainInternal() const { if (ptr_ == kBeginShortData) { return 0; } else if (ptr_ == kEndShortData || ptr_ == BlockPtrPtr::from_ptr(chain_->end_)) { return chain_->size(); } else if (chain_->has_here()) { switch (block_index()) { case 0: return 0; case 1: return chain_->begin_[0].block_ptr->size(); default: RIEGELI_ASSUME_UNREACHABLE() << "Failed invariant of Chain: " "only two block pointers fit without allocating their array"; } } else { const size_t offset_base = chain_->begin_[chain_->block_offsets()].block_offset; return ptr_.as_ptr()[chain_->block_offsets()].block_offset - offset_base; } } Chain::Block Chain::Block::ToChainBlock(absl::string_view substr) && { if (substr.size() == block_->size()) return std::move(*this); return Block(std::move(*this), substr); } absl::Cord Chain::Block::ToCord(absl::string_view substr) && { if (const FlatCordBlock* const cord_ptr = block_->checked_external_object()) { if (substr.size() == cord_ptr->src().size()) return cord_ptr->src(); return cord_ptr->src().Subcord( PtrDistance(absl::string_view(*cord_ptr).data(), substr.data()), substr.size()); } return absl::MakeCordFromExternal(substr, [block = std::move(block_)] {}); } absl::Cord Chain::Block::ToCord(absl::string_view substr) const& { if (const FlatCordBlock* const cord_ptr = block_->checked_external_object()) { if (substr.size() == cord_ptr->src().size()) return cord_ptr->src(); return cord_ptr->src().Subcord( PtrDistance(absl::string_view(*cord_ptr).data(), substr.data()), substr.size()); } return absl::MakeCordFromExternal(substr, [block = block_] {}); } void Chain::Block::DumpStructure(absl::string_view substr, std::ostream& dest) const { dest << "[block] { offset: " << PtrDistance(block_->data_begin(), substr.data()) << " "; block_->DumpStructure(dest); dest << " }"; } Chain::Chain(const absl::Cord& src) { Initialize(src); } Chain::Chain(absl::Cord&& src) { Initialize(std::move(src)); } Chain::Chain(const Chain& that) { Initialize(that); } Chain& Chain::operator=(const Chain& that) { if (ABSL_PREDICT_TRUE(&that != this)) { UnrefBlocks(); Initialize(that); } return *this; } bool Chain::ClearSlow() { RIEGELI_ASSERT_NE(begin_, end_) << "Failed precondition of Chain::ClearSlow(): " "no blocks, use Clear() instead"; const bool block_remains = front()->TryClear(); BlockPtr* const new_end = begin_ + (block_remains ? 1 : 0); UnrefBlocks(new_end, end_); end_ = new_end; return block_remains; } void Chain::Reset(BytesRef src) { size_ = 0; if (begin_ != end_ && ClearSlow()) { Append(src, Options().set_size_hint(src.size())); return; } Initialize(src); } void Chain::Reset(Block src) { size_ = 0; UnrefBlocks(); end_ = begin_; if (src.raw_block() != nullptr) Initialize(std::move(src)); } void Chain::Reset(const absl::Cord& src) { size_ = 0; if (begin_ != end_ && ClearSlow()) { Append(src, Options().set_size_hint(src.size())); return; } Initialize(src); } void Chain::Reset(absl::Cord&& src) { size_ = 0; if (begin_ != end_ && ClearSlow()) { const size_t size = src.size(); Append(std::move(src), Options().set_size_hint(size)); return; } Initialize(std::move(src)); } void Chain::InitializeSlow(absl::string_view src) { RIEGELI_ASSERT_GT(src.size(), kMaxShortDataSize) << "Failed precondition of Chain::InitializeSlow(string_view): " "string too short, use Initialize() instead"; IntrusiveSharedPtr block = RawBlock::NewInternal(UnsignedMin(src.size(), kDefaultMaxBlockSize)); const absl::Span buffer = block->AppendBuffer(src.size()); std::memcpy(buffer.data(), src.data(), buffer.size()); Initialize(Block(std::move(block))); Options options; options.set_size_hint(src.size()); src.remove_prefix(buffer.size()); Append(src, options); } inline void Chain::Initialize(const absl::Cord& src) { RIEGELI_ASSERT_EQ(size_, 0u) << "Failed precondition of Chain::Initialize(const Cord&): " "size not reset"; InitializeFromCord(src); } inline void Chain::Initialize(absl::Cord&& src) { RIEGELI_ASSERT_EQ(size_, 0u) << "Failed precondition of Chain::Initialize(absl::Cord&&): " "size not reset"; InitializeFromCord(std::move(src)); } template inline void Chain::InitializeFromCord(CordRef&& src) { if (const std::optional flat = src.TryFlat(); flat != std::nullopt) { if (flat->size() <= kMaxBytesToCopyToEmpty) { Initialize(*flat); } else { Initialize( Block(riegeli::Maker(std::forward(src)))); } return; } AppendCordSlow(std::forward(src), Options().set_size_hint(src.size())); } inline void Chain::Initialize(const Chain& src) { size_ = src.size_; end_ = begin_; if (src.begin_ == src.end_) { EnsureHasHere(); std::memcpy(short_data_begin(), src.short_data_begin(), kMaxShortDataSize); } else { AppendBlocks(src.begin_, src.end_); } } inline std::string Chain::ToString() const { if (begin_ == end_) return std::string(short_data()); std::string dest; absl::StringResizeAndOverwrite(dest, size_, [&](char* data, size_t size) { CopyToSlow(data); return size; }); return dest; } absl::string_view Chain::FlattenSlow() { RIEGELI_ASSERT_GT(end_ - begin_, 1) << "Failed precondition of Chain::FlattenSlow(): " "contents already flat, use Flatten() instead"; if (front()->empty()) { PopFront(); if (end_ - begin_ == 1) return *front(); } if (back()->empty()) { PopBack(); if (end_ - begin_ == 1) return *back(); } IntrusiveSharedPtr block = RawBlock::NewInternal(NewBlockCapacity(0, size_, size_, Options())); const BlockPtr* iter = begin_; do { block->Append(*iter->block_ptr); ++iter; } while (iter != end_); UnrefBlocks(begin_, end_); end_ = begin_; PushBack(std::move(block)); return *back(); } inline Chain::BlockPtr* Chain::NewBlockPtrs(size_t capacity) { return std::allocator().allocate(2 * capacity); } void Chain::UnrefBlocksSlow(const BlockPtr* begin, const BlockPtr* end) { RIEGELI_ASSERT_LT(begin, end) << "Failed precondition of Chain::UnrefBlocksSlow(): " "no blocks, use UnrefBlocks() instead"; do { (begin++)->block_ptr->Unref(); } while (begin != end); } inline void Chain::DropPassedBlocks(PassOwnership) { size_ = 0; end_ = begin_; } inline void Chain::DropPassedBlocks(ShareOwnership) const {} void Chain::CopyTo(char* dest) const { if (begin_ == end_) { riegeli::null_safe_memcpy(dest, short_data_begin(), size_); return; } CopyToSlow(dest); } inline void Chain::CopyToSlow(char* dest) const { RIEGELI_ASSERT_NE(begin_, end_) << "Failed precondition of Chain::CopyToSlow(): " "no blocks, use CopyTo() instead"; const BlockPtr* iter = begin_; do { std::memcpy(dest, iter->block_ptr->data_begin(), iter->block_ptr->size()); dest += iter->block_ptr->size(); ++iter; } while (iter != end_); } void Chain::AppendTo(std::string& dest) const& { const size_t old_size = dest.size(); RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - old_size) << "Failed precondition of Chain::AppendTo(string&): " "string size overflow"; riegeli::StringResizeAndOverwriteAmortized(dest, old_size + size_, [&](char* data, size_t size) { CopyTo(data + old_size); return size; }); } void Chain::AppendTo(std::string& dest) && { if (dest.empty() && PtrDistance(begin_, end_) == 1) { if (std::string* const string_ptr = back()->checked_external_object_with_unique_owner()) { RIEGELI_ASSERT_EQ(back()->size(), string_ptr->size()) << "Failed invariant of Chain::RawBlock: " "block size differs from string size"; if (dest.capacity() <= string_ptr->capacity()) { dest = std::move(*string_ptr); size_ = 0; PopBack(); return; } } } const size_t old_size = dest.size(); RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - old_size) << "Failed precondition of Chain::AppendTo(string&): " "string size overflow"; riegeli::StringResizeAndOverwriteAmortized(dest, old_size + size_, [&](char* data, size_t size) { CopyTo(data + old_size); return size; }); } void Chain::AppendTo(absl::Cord& dest) const& { RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - dest.size()) << "Failed precondition of Chain::AppendTo(Cord&): Cord size overflow"; if (begin_ == end_) { dest.Append(short_data()); return; } AppendToSlow(dest); } void Chain::AppendTo(absl::Cord& dest) && { RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - dest.size()) << "Failed precondition of Chain::AppendTo(Cord&): Cord size overflow"; if (begin_ == end_) { dest.Append(short_data()); return; } std::move(*this).AppendToSlow(dest); } inline void Chain::AppendToSlow(absl::Cord& dest) const& { RIEGELI_ASSERT_NE(begin_, end_) << "Failed precondition of Chain::AppendToSlow(Cord&): " "no blocks, use AppendTo() instead"; const BlockPtr* iter = begin_; do { ExternalRef(riegeli::Invoker(MakeBlock(), iter->block_ptr), *iter->block_ptr) .AppendTo(dest); ++iter; } while (iter != end_); } inline void Chain::AppendToSlow(absl::Cord& dest) && { RIEGELI_ASSERT_NE(begin_, end_) << "Failed precondition of Chain::AppendToSlow(Cord&): " "no blocks, use AppendTo() instead"; size_ = 0; const BlockPtr* iter = begin_; do { ExternalRef(riegeli::Invoker(MakeBlock(), IntrusiveSharedPtr(iter->block_ptr)), *iter->block_ptr) .AppendTo(dest); ++iter; } while (iter != end_); end_ = begin_; } void Chain::PrependTo(absl::Cord& dest) const& { RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - dest.size()) << "Failed precondition of Chain::PrependTo(Cord&): Cord size overflow"; if (begin_ == end_) { dest.Prepend(short_data()); return; } PrependToSlow(dest); } void Chain::PrependTo(absl::Cord& dest) && { RIEGELI_CHECK_LE(size_, std::numeric_limits::max() - dest.size()) << "Failed precondition of Chain::PrependTo(Cord&): Cord size overflow"; if (begin_ == end_) { dest.Prepend(short_data()); return; } std::move(*this).PrependToSlow(dest); } inline void Chain::PrependToSlow(absl::Cord& dest) const& { RIEGELI_ASSERT_NE(end_, begin_) << "Failed precondition of Chain::PrependToSlow(Cord&): " "no blocks, use PrependTo() instead"; const BlockPtr* iter = end_; do { --iter; ExternalRef(riegeli::Invoker(MakeBlock(), iter->block_ptr), *iter->block_ptr) .PrependTo(dest); } while (iter != begin_); } inline void Chain::PrependToSlow(absl::Cord& dest) && { RIEGELI_ASSERT_NE(end_, begin_) << "Failed precondition of Chain::PrependToSlow(Cord&): " "no blocks, use PrependTo() instead"; const BlockPtr* iter = end_; size_ = 0; do { --iter; ExternalRef(riegeli::Invoker(MakeBlock(), IntrusiveSharedPtr(iter->block_ptr)), *iter->block_ptr) .PrependTo(dest); } while (iter != begin_); end_ = begin_; } Chain::operator std::string() const& { return ToString(); } Chain::operator std::string() && { if (PtrDistance(begin_, end_) == 1) { if (std::string* const string_ptr = back()->checked_external_object_with_unique_owner()) { RIEGELI_ASSERT_EQ(back()->size(), string_ptr->size()) << "Failed invariant of Chain::RawBlock: " "block size differs from string size"; const std::string dest = std::move(*string_ptr); size_ = 0; PopBack(); return dest; } } return ToString(); } Chain::operator absl::Cord() const& { if (begin_ == end_) return absl::Cord(short_data()); absl::Cord dest; AppendToSlow(dest); return dest; } Chain::operator absl::Cord() && { if (begin_ == end_) return absl::Cord(short_data()); absl::Cord dest; std::move(*this).AppendToSlow(dest); return dest; } Chain::BlockAndChar Chain::BlockAndCharIndex(size_t char_index_in_chain) const { RIEGELI_ASSERT_LE(char_index_in_chain, size()) << "Failed precondition of Chain::BlockAndCharIndex(): " "position out of range"; if (char_index_in_chain == size()) { return BlockAndChar{blocks().cend(), 0}; } else if (begin_ == end_) { return BlockAndChar{blocks().cbegin(), char_index_in_chain}; } else if (has_here()) { BlockIterator block_iter = blocks().cbegin(); if (char_index_in_chain >= block_iter->size()) { char_index_in_chain -= block_iter->size(); ++block_iter; RIEGELI_ASSERT_LT(char_index_in_chain, block_iter->size()) << "Failed invariant of Chain: " "only two block pointers fit without allocating their array"; } return BlockAndChar{block_iter, char_index_in_chain}; } else { const size_t offset_base = begin_[block_offsets()].block_offset; const BlockPtr* const found = std::upper_bound(begin_ + block_offsets() + 1, end_ + block_offsets(), char_index_in_chain, [&](size_t value, BlockPtr element) { return value < element.block_offset - offset_base; }) - 1; return BlockAndChar{ BlockIterator(this, PtrDistance(begin_ + block_offsets(), found)), char_index_in_chain - (found->block_offset - offset_base)}; } } void Chain::DumpStructure(std::ostream& dest) const { dest << "chain {\n size: " << size_ << " memory: " << riegeli::EstimateMemory(*this); for (const BlockPtr* iter = begin_; iter != end_; ++iter) { dest << "\n "; iter->block_ptr->DumpStructure(dest); } dest << "\n}\n"; } void Chain::RegisterSubobjects(MemoryEstimator& memory_estimator) const { if (has_allocated()) { memory_estimator.RegisterMemory( 2 * PtrDistance(block_ptrs_.allocated.begin, block_ptrs_.allocated.end) * sizeof(BlockPtr)); } for (const BlockPtr* iter = begin_; iter != end_; ++iter) { if (memory_estimator.RegisterNode(iter->block_ptr)) { memory_estimator.RegisterDynamicObject(iter->block_ptr); } } } inline IntrusiveSharedPtr Chain::SetBack( IntrusiveSharedPtr block) { return IntrusiveSharedPtr( std::exchange(end_[-1].block_ptr, block.Release())); // There is no need to adjust block offsets because the size of the last block // is not reflected in block offsets. } inline IntrusiveSharedPtr Chain::SetFront( IntrusiveSharedPtr block) { IntrusiveSharedPtr old_block = SetFrontSameSize(std::move(block)); RefreshFront(); return old_block; } inline IntrusiveSharedPtr Chain::SetFrontSameSize( IntrusiveSharedPtr block) { return IntrusiveSharedPtr( std::exchange(begin_[0].block_ptr, block.Release())); } inline void Chain::RefreshFront() { if (has_allocated()) { begin_[block_offsets()].block_offset = begin_ + 1 == end_ ? size_t{0} : begin_[block_offsets() + 1].block_offset - begin_[0].block_ptr->size(); } } inline void Chain::PushBack(IntrusiveSharedPtr block) { ReserveBack(1); end_[0].block_ptr = block.Release(); if (has_allocated()) { end_[block_offsets()].block_offset = begin_ == end_ ? size_t{0} : end_[block_offsets() - 1].block_offset + end_[-1].block_ptr->size(); } ++end_; } inline void Chain::PushFront(IntrusiveSharedPtr block) { ReserveFront(1); BlockPtr* const old_begin = begin_; --begin_; begin_[0].block_ptr = block.Release(); if (has_allocated()) { begin_[block_offsets()].block_offset = old_begin == end_ ? size_t{0} : begin_[block_offsets() + 1].block_offset - begin_[0].block_ptr->size(); } } inline IntrusiveSharedPtr Chain::PopBack() { RIEGELI_ASSERT_NE(begin_, end_) << "Failed precondition of Chain::PopBack(): no blocks"; --end_; return IntrusiveSharedPtr(end_[0].block_ptr); } inline IntrusiveSharedPtr Chain::PopFront() { RIEGELI_ASSERT_NE(begin_, end_) << "Failed precondition of Chain::PopFront(): no blocks"; if (has_here()) { // Shift the remaining 0 or 1 block pointers to the left by 1 because // `begin_` must remain at `block_ptrs_.here`. There might be no pointer to // copy; it is more efficient to copy the array slot unconditionally. IntrusiveSharedPtr block( std::exchange(block_ptrs_.here[0], block_ptrs_.here[1]).block_ptr); --end_; return block; } else { ++begin_; return IntrusiveSharedPtr(begin_[-1].block_ptr); } } template inline void Chain::AppendBlocks(const BlockPtr* begin, const BlockPtr* end) { if (begin == end) return; ReserveBack(PtrDistance(begin, end)); BlockPtr* dest_iter = end_; dest_iter->block_ptr = begin->block_ptr->Ref(); if (has_allocated()) { const size_t offsets = block_offsets(); size_t offset = begin_ == end_ ? size_t{0} : dest_iter[offsets - 1].block_offset + dest_iter[-1].block_ptr->size(); dest_iter[offsets].block_offset = offset; ++begin; ++dest_iter; while (begin != end) { dest_iter->block_ptr = begin->block_ptr->Ref(); offset += dest_iter[-1].block_ptr->size(); dest_iter[offsets].block_offset = offset; ++begin; ++dest_iter; } } else { ++begin; ++dest_iter; if (begin != end) { dest_iter->block_ptr = begin->block_ptr->Ref(); ++begin; ++dest_iter; RIEGELI_ASSERT_EQ(begin, end) << "Failed invariant of Chain: " "only two block pointers fit without allocating their array"; } } end_ = dest_iter; } template inline void Chain::PrependBlocks(const BlockPtr* begin, const BlockPtr* end) { if (begin == end) return; ReserveFront(PtrDistance(begin, end)); BlockPtr* dest_iter = begin_; BlockPtr* const old_begin = begin_; begin_ -= PtrDistance(begin, end); // For `has_allocated()` to work. --end; --dest_iter; dest_iter->block_ptr = end->block_ptr->Ref(); if (has_allocated()) { const size_t offsets = block_offsets(); size_t offset = old_begin == end_ ? size_t{0} : dest_iter[offsets + 1].block_offset - dest_iter->block_ptr->size(); dest_iter[offsets].block_offset = offset; while (end != begin) { --end; --dest_iter; dest_iter->block_ptr = end->block_ptr->Ref(); offset -= dest_iter->block_ptr->size(); dest_iter[offsets].block_offset = offset; } } else { if (end != begin) { --end; --dest_iter; dest_iter->block_ptr = end->block_ptr->Ref(); RIEGELI_ASSERT_EQ(begin, end) << "Failed invariant of Chain: " "only two block pointers fit without allocating their array"; } } } inline void Chain::ReserveBack(size_t extra_capacity) { BlockPtr* const allocated_end = has_here() ? block_ptrs_.here + 2 : block_ptrs_.allocated.end; if (ABSL_PREDICT_FALSE(extra_capacity > PtrDistance(end_, allocated_end))) { // The slow path is in a separate function to make easier for the compiler // to make good inlining decisions. ReserveBackSlow(extra_capacity); } } inline void Chain::ReserveFront(size_t extra_capacity) { BlockPtr* const allocated_begin = has_here() ? block_ptrs_.here : block_ptrs_.allocated.begin; if (ABSL_PREDICT_FALSE(extra_capacity > PtrDistance(allocated_begin, begin_))) { // The slow path is in a separate function to make easier for the compiler // to make good inlining decisions. ReserveFrontSlow(extra_capacity); } } inline void Chain::ReserveBackSlow(size_t extra_capacity) { RIEGELI_ASSERT_GT(extra_capacity, 0u) << "Failed precondition of Chain::ReserveBackSlow(): " "nothing to do, use ReserveBack() instead"; BlockPtr* old_allocated_begin; BlockPtr* old_allocated_end; if (has_here()) { old_allocated_begin = block_ptrs_.here; old_allocated_end = block_ptrs_.here + 2; } else { old_allocated_begin = block_ptrs_.allocated.begin; old_allocated_end = block_ptrs_.allocated.end; } RIEGELI_ASSERT_GT(extra_capacity, PtrDistance(end_, old_allocated_end)) << "Failed precondition of Chain::ReserveBackSlow(): " "extra capacity fits in allocated space, use ReserveBack() instead"; RIEGELI_ASSERT_LE(extra_capacity, std::numeric_limits::max() / (2 * sizeof(BlockPtr)) - PtrDistance(old_allocated_begin, end_)) << "Failed invariant of Chain: array of block pointers overflow, " "possibly blocks are too small"; const size_t old_capacity = PtrDistance(old_allocated_begin, old_allocated_end); const size_t size = PtrDistance(begin_, end_); if (size + extra_capacity <= old_capacity && 2 * size <= old_capacity) { RIEGELI_ASSERT(has_allocated()) << "The case of has_here() if there is space without reallocation " "was handled in ReserveBack()"; // Existing array has enough capacity and is at most half full: move // contents to the beginning of the array. This is enough to make the // amortized cost of adding one element constant as long as prepending // leaves space at both ends. BlockPtr* const new_begin = old_allocated_begin; // Moving left, so block pointers must be moved before block offsets. std::memmove(new_begin, begin_, size * sizeof(BlockPtr)); std::memmove(new_begin + old_capacity, begin_ + old_capacity, size * sizeof(BlockPtr)); begin_ = new_begin; end_ = new_begin + size; return; } // Reallocate the array, without keeping space before the contents. This is // enough to make the amortized cost of adding one element constant if // prepending leaves space at both ends. RIEGELI_ASSERT_LE(old_capacity / 2, std::numeric_limits::max() / (2 * sizeof(BlockPtr)) - old_capacity) << "Failed invariant of Chain: array of block pointers overflow, " "possibly blocks are too small"; const size_t new_capacity = UnsignedMax(PtrDistance(begin_, end_) + extra_capacity, old_capacity + old_capacity / 2, size_t{16}); BlockPtr* const new_allocated_begin = NewBlockPtrs(new_capacity); BlockPtr* const new_allocated_end = new_allocated_begin + new_capacity; BlockPtr* const new_begin = new_allocated_begin; BlockPtr* const new_end = new_begin + size; std::memcpy(new_begin, begin_, size * sizeof(BlockPtr)); if (has_allocated()) { std::memcpy(new_begin + new_capacity, begin_ + old_capacity, size * sizeof(BlockPtr)); } else if (size >= 1) { RIEGELI_ASSERT_LE(size, 2u) << "Failed invariant of Chain: " "only two block pointers fit without allocating their array"; new_begin[new_capacity].block_offset = 0; if (size == 2) { new_begin[new_capacity + 1].block_offset = new_begin[0].block_ptr->size(); } } DeleteBlockPtrs(); block_ptrs_.allocated.begin = new_allocated_begin; block_ptrs_.allocated.end = new_allocated_end; begin_ = new_begin; end_ = new_end; } inline void Chain::ReserveFrontSlow(size_t extra_capacity) { RIEGELI_ASSERT_GT(extra_capacity, 0u) << "Failed precondition of Chain::ReserveFrontSlow(): " "nothing to do, use ReserveFront() instead"; BlockPtr* old_allocated_begin; BlockPtr* old_allocated_end; if (has_here()) { if (ABSL_PREDICT_TRUE(extra_capacity <= PtrDistance(end_, block_ptrs_.here + 2))) { // There is space without reallocation. Shift 1 block pointer to the right // by 1, or 0 block pointers by 1 or 2, because `begin_` must remain at // `block_ptrs_.here`. There might be no pointer to copy; it is cheaper to // copy the array slot unconditionally. block_ptrs_.here[1] = block_ptrs_.here[0]; begin_ += extra_capacity; end_ += extra_capacity; return; } old_allocated_begin = block_ptrs_.here; old_allocated_end = end_; } else { old_allocated_begin = block_ptrs_.allocated.begin; old_allocated_end = block_ptrs_.allocated.end; } RIEGELI_ASSERT_GT(extra_capacity, PtrDistance(old_allocated_begin, begin_)) << "Failed precondition of Chain::ReserveFrontSlow(): " "extra capacity fits in allocated space, use ReserveFront() instead"; RIEGELI_ASSERT_LE(extra_capacity, std::numeric_limits::max() / (2 * sizeof(BlockPtr)) - PtrDistance(begin_, old_allocated_end)) << "Failed invariant of Chain: array of block pointers overflow, " "possibly blocks are too small"; const size_t old_capacity = PtrDistance(old_allocated_begin, old_allocated_end); const size_t size = PtrDistance(begin_, end_); if (size + extra_capacity <= old_capacity && 2 * size <= old_capacity) { RIEGELI_ASSERT(has_allocated()) << "The case of has_here() if there is space without reallocation " "was handled above"; // Existing array has enough capacity and is at most half full: move // contents to the middle of the array. This makes the amortized cost of // adding one element constant. BlockPtr* const new_begin = old_allocated_begin + (old_capacity - size + extra_capacity) / 2; // Moving right, so block offsets must be moved before block pointers. std::memmove(new_begin + old_capacity, begin_ + old_capacity, size * sizeof(BlockPtr)); std::memmove(new_begin, begin_, size * sizeof(BlockPtr)); begin_ = new_begin; end_ = new_begin + size; return; } // Reallocate the array, keeping space after the contents unchanged. This // makes the amortized cost of adding one element constant. RIEGELI_ASSERT_LE(old_capacity / 2, std::numeric_limits::max() / (2 * sizeof(BlockPtr)) - old_capacity) << "Failed invariant of Chain: array of block pointers overflow, " "possibly blocks are too small"; const size_t new_capacity = UnsignedMax(PtrDistance(begin_, old_allocated_end) + extra_capacity, old_capacity + old_capacity / 2, size_t{16}); BlockPtr* const new_allocated_begin = NewBlockPtrs(new_capacity); BlockPtr* const new_allocated_end = new_allocated_begin + new_capacity; BlockPtr* const new_end = new_allocated_end - PtrDistance(end_, old_allocated_end); BlockPtr* const new_begin = new_end - size; std::memcpy(new_begin, begin_, size * sizeof(BlockPtr)); if (has_allocated()) { std::memcpy(new_begin + new_capacity, begin_ + old_capacity, size * sizeof(BlockPtr)); } else if (size >= 1) { RIEGELI_ASSERT_LE(size, 2u) << "Failed invariant of Chain: " "only two block pointers fit without allocating their array"; new_begin[new_capacity].block_offset = 0; if (size == 2) { new_begin[new_capacity + 1].block_offset = new_begin[0].block_ptr->size(); } } DeleteBlockPtrs(); block_ptrs_.allocated.begin = new_allocated_begin; block_ptrs_.allocated.end = new_allocated_end; begin_ = new_begin; end_ = new_end; } inline size_t Chain::NewBlockCapacity(size_t replaced_length, size_t min_length, size_t recommended_length, Options options) const { RIEGELI_ASSERT_LE(replaced_length, size_) << "Failed precondition of Chain::NewBlockCapacity(): " "length to replace greater than current size"; RIEGELI_ASSERT_LE(min_length, RawBlock::kMaxCapacity - replaced_length) << "Chain block capacity overflow"; return replaced_length + ApplyBufferConstraints( ApplySizeHint( UnsignedMax(size_, SaturatingSub(options.min_block_size(), replaced_length)), options.size_hint(), size_), min_length, recommended_length, SaturatingSub(options.max_block_size(), replaced_length)); } absl::Span Chain::AppendBuffer(size_t min_length, size_t recommended_length, size_t max_length, Options options) { RIEGELI_ASSERT_LE(min_length, max_length) << "Failed precondition of Chain::AppendBuffer(): " "min_length > max_length"; RIEGELI_CHECK_LE(min_length, std::numeric_limits::max() - size_) << "Failed precondition of Chain::AppendBuffer(): " "Chain size overflow"; if (begin_ == end_) { RIEGELI_ASSERT_LE(size_, kMaxShortDataSize) << "Failed invariant of Chain: short data size too large"; if (min_length <= kMaxShortDataSize - size_) { // Do not bother returning short data if `recommended_length` or // `size_hint` is larger, because data will likely need to be copied later // to a real block. if (recommended_length <= kMaxShortDataSize - size_ && (options.size_hint() == std::nullopt || *options.size_hint() <= kMaxShortDataSize)) { // Append the new space to short data. EnsureHasHere(); const absl::Span buffer( short_data_begin() + size_, UnsignedMin(max_length, kMaxShortDataSize - size_)); size_ += buffer.size(); return buffer; } else if (min_length == 0) { return absl::Span(); } } // Merge short data with the new space to a new block. IntrusiveSharedPtr block; if (ABSL_PREDICT_FALSE(min_length > RawBlock::kMaxCapacity - size_)) { block = RawBlock::NewInternal(kMaxShortDataSize); block->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); PushBack(std::move(block)); block = RawBlock::NewInternal( NewBlockCapacity(0, min_length, recommended_length, options)); } else { block = RawBlock::NewInternal(NewBlockCapacity( size_, UnsignedMax(min_length, kMaxShortDataSize - size_), recommended_length, options)); block->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); } PushBack(std::move(block)); } else { if (back()->can_append(min_length)) { // New space can be appended in place. } else if (min_length == 0) { return absl::Span(); } else if (back()->tiny() && ABSL_PREDICT_TRUE(min_length <= RawBlock::kMaxCapacity - back()->size())) { // The last block must be rewritten. Merge it with the new space to a // new block. IntrusiveSharedPtr block = RawBlock::NewInternal(NewBlockCapacity(back()->size(), min_length, recommended_length, options)); block->Append(*back()); SetBack(std::move(block)); } else { IntrusiveSharedPtr block; if (back()->wasteful()) { // The last block must be rewritten. Rewrite it separately from the new // block to avoid rewriting the same data again if the new block gets // only partially filled. IntrusiveSharedPtr last = SetBack(back()->Copy()); if (last->TryClear() && last->can_append(min_length)) { // Reuse this block. block = std::move(last); } } if (block == nullptr) { // Append a new block. block = RawBlock::NewInternal( NewBlockCapacity(0, min_length, recommended_length, options)); } PushBack(std::move(block)); } } const absl::Span buffer = back()->AppendBuffer( UnsignedMin(max_length, std::numeric_limits::max() - size_)); RIEGELI_ASSERT_GE(buffer.size(), min_length) << "Chain::RawBlock::AppendBuffer() returned less than the free space"; size_ += buffer.size(); return buffer; } absl::Span Chain::PrependBuffer(size_t min_length, size_t recommended_length, size_t max_length, Options options) { RIEGELI_ASSERT_LE(min_length, max_length) << "Failed precondition of Chain::PrependBuffer(): " "min_length > max_length"; RIEGELI_CHECK_LE(min_length, std::numeric_limits::max() - size_) << "Failed precondition of Chain::PrependBuffer(): " "Chain size overflow"; if (begin_ == end_) { RIEGELI_ASSERT_LE(size_, kMaxShortDataSize) << "Failed invariant of Chain: short data size too large"; if (min_length <= kMaxShortDataSize - size_) { // Do not bother returning short data if `recommended_length` or // `size_hint` is larger, because data will likely need to be copied later // to a real block. if (recommended_length <= kMaxShortDataSize - size_ && (options.size_hint() == std::nullopt || *options.size_hint() <= kMaxShortDataSize)) { // Prepend the new space to short data. EnsureHasHere(); const absl::Span buffer( short_data_begin(), UnsignedMin(max_length, kMaxShortDataSize - size_)); std::memmove(buffer.data() + buffer.size(), short_data_begin(), size_); size_ += buffer.size(); return buffer; } else if (min_length == 0) { return absl::Span(); } } // Merge short data with the new space to a new block. IntrusiveSharedPtr block; if (ABSL_PREDICT_FALSE(min_length > RawBlock::kMaxCapacity - size_)) { block = RawBlock::NewInternal(kMaxShortDataSize); block->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); PushFront(std::move(block)); block = RawBlock::NewInternal( NewBlockCapacity(0, min_length, recommended_length, options)); } else { block = RawBlock::NewInternal( NewBlockCapacity(size_, min_length, recommended_length, options)); block->Prepend(short_data()); } PushFront(std::move(block)); } else { if (front()->can_prepend(min_length)) { // New space can be prepended in place. } else if (min_length == 0) { return absl::Span(); } else if (front()->tiny() && ABSL_PREDICT_TRUE(min_length <= RawBlock::kMaxCapacity - front()->size())) { // The first block must be rewritten. Merge it with the new space to a // new block. IntrusiveSharedPtr block = RawBlock::NewInternal(NewBlockCapacity(front()->size(), min_length, recommended_length, options)); block->Prepend(*front()); SetFront(std::move(block)); } else { IntrusiveSharedPtr block; if (front()->wasteful()) { // The first block must be rewritten. Rewrite it separately from the new // block to avoid rewriting the same data again if the new block gets // only partially filled. IntrusiveSharedPtr first = SetFrontSameSize(front()->Copy()); if (first->TryClear() && first->can_prepend(min_length)) { // Reuse this block. block = std::move(first); } } if (block == nullptr) { // Prepend a new block. block = RawBlock::NewInternal( NewBlockCapacity(0, min_length, recommended_length, options)); } PushFront(std::move(block)); } } const absl::Span buffer = front()->PrependBuffer( UnsignedMin(max_length, std::numeric_limits::max() - size_)); RIEGELI_ASSERT_GE(buffer.size(), min_length) << "Chain::RawBlock::PrependBuffer() returned less than the free space"; RefreshFront(); size_ += buffer.size(); return buffer; } void Chain::Append(BytesRef src, Options options) { while (!src.empty()) { const absl::Span buffer = AppendBuffer(1, src.size(), src.size(), options); std::memcpy(buffer.data(), src.data(), buffer.size()); src.remove_prefix(buffer.size()); } } void Chain::Append(const Chain& src, Options options) { AppendChain(src, options); } void Chain::Append(Chain&& src, Options options) { AppendChain(std::move(src), options); } template inline void Chain::AppendChain(ChainRef&& src, Options options) { if (src.begin_ == src.end_) { Append(src.short_data(), options); return; } RIEGELI_CHECK_LE(src.size(), std::numeric_limits::max() - size_) << "Failed precondition of Chain::Append(Chain): " "Chain size overflow"; const BlockPtr* src_iter = src.begin_; // If the first block of `src` is handled specially, // `(src_iter++)->block_ptr->Unref()` skips it so that // `AppendBlocks()` does not append it again. if (begin_ == end_) { if (src.front()->tiny() || (src.end_ - src.begin_ > 1 && src.front()->wasteful())) { // The first block of `src` must be rewritten. Merge short data with it to // a new block. if (!short_data().empty() || !src.front()->empty()) { RIEGELI_ASSERT_LE(src.front()->size(), RawBlock::kMaxCapacity - size_) << "Sum of sizes of short data and a tiny or wasteful block " "exceeds RawBlock::kMaxCapacity"; const size_t capacity = src.end_ - src.begin_ == 1 ? NewBlockCapacity(size_, UnsignedMax(src.front()->size(), kMaxShortDataSize - size_), 0, options) : UnsignedMax(size_ + src.front()->size(), kMaxShortDataSize); IntrusiveSharedPtr merged = RawBlock::NewInternal(capacity); merged->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); merged->Append(*src.front()); PushBack(std::move(merged)); } (src_iter++)->block_ptr->Unref(); } else if (!empty()) { // Copy short data to a real block. IntrusiveSharedPtr real = RawBlock::NewInternal(kMaxShortDataSize); real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); PushBack(std::move(real)); } } else { if (back()->tiny() && src.front()->tiny()) { merge: // Boundary blocks must be merged, or they are both empty or wasteful so // merging them is cheaper than rewriting them separately. if (back()->empty() && src.front()->empty()) { PopBack(); } else if (back()->can_append(src.front()->size()) && (src.end_ - src.begin_ == 1 || !back()->wasteful(src.front()->size()))) { // Boundary blocks can be appended in place; this is always cheaper than // merging them to a new block. back()->Append(*src.front()); } else { // Boundary blocks cannot be appended in place. Merge them to a new // block. RIEGELI_ASSERT_LE(src.front()->size(), RawBlock::kMaxCapacity - back()->size()) << "Sum of sizes of two tiny or wasteful blocks exceeds " "RawBlock::kMaxCapacity"; const size_t capacity = src.end_ - src.begin_ == 1 ? NewBlockCapacity(back()->size(), src.front()->size(), 0, options) : back()->size() + src.front()->size(); IntrusiveSharedPtr merged = RawBlock::NewInternal(capacity); merged->Append(*back()); merged->Append(*src.front()); SetBack(std::move(merged)); } (src_iter++)->block_ptr->Unref(); } else if (back()->empty()) { if (src.end_ - src.begin_ > 1 && src.front()->wasteful()) goto merge; // The last block is empty and must be removed. PopBack(); } else if (back()->wasteful()) { if (src.end_ - src.begin_ > 1 && (src.front()->empty() || src.front()->wasteful())) { goto merge; } // The last block must reduce waste. if (back()->can_append(src.front()->size()) && (src.end_ - src.begin_ == 1 || !back()->wasteful(src.front()->size())) && src.front()->size() <= kAllocationCost + back()->size()) { // Appending in place is possible and is cheaper than rewriting the last // block. back()->Append(*src.front()); (src_iter++)->block_ptr->Unref(); } else { // Appending in place is not possible, or rewriting the last block is // cheaper. SetBack(back()->Copy()); } } else if (src.end_ - src.begin_ > 1) { if (src.front()->empty()) { // The first block of `src` is empty and must be skipped. (src_iter++)->block_ptr->Unref(); } else if (src.front()->wasteful()) { // The first block of `src` must reduce waste. if (back()->can_append(src.front()->size()) && !back()->wasteful(src.front()->size())) { // Appending in place is possible; this is always cheaper than // rewriting the first block of `src`. back()->Append(*src.front()); } else { // Appending in place is not possible. PushBack(src.front()->Copy()); } (src_iter++)->block_ptr->Unref(); } } } size_ += src.size_; AppendBlocks(src_iter, src.end_); src.DropPassedBlocks(Ownership()); } void Chain::Append(const Block& src, Options options) { if (src.raw_block() != nullptr) AppendRawBlock(src.raw_block(), options); } void Chain::Append(Block&& src, Options options) { if (src.raw_block() != nullptr) { AppendRawBlock(std::move(src).raw_block(), options); } } template inline void Chain::AppendRawBlock(RawBlockPtrRef&& block, Options options) { RIEGELI_CHECK_LE(block->size(), std::numeric_limits::max() - size_) << "Failed precondition of Chain::Append(Block): " "Chain size overflow"; if (begin_ == end_) { if (!short_data().empty()) { if (block->tiny()) { // The block must be rewritten. Merge short data with it to a new block. RIEGELI_ASSERT_LE(block->size(), RawBlock::kMaxCapacity - size_) << "Sum of sizes of short data and a tiny block exceeds " "RawBlock::kMaxCapacity"; const size_t capacity = NewBlockCapacity( size_, UnsignedMax(block->size(), kMaxShortDataSize - size_), 0, options); IntrusiveSharedPtr merged = RawBlock::NewInternal(capacity); merged->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); merged->Append(*block); PushBack(std::move(merged)); size_ += block->size(); return; } // Copy short data to a real block. IntrusiveSharedPtr real = RawBlock::NewInternal(kMaxShortDataSize); real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); PushBack(std::move(real)); } } else { if (back()->tiny() && block->tiny()) { // Boundary blocks must be merged. if (back()->can_append(block->size())) { // Boundary blocks can be appended in place; this is always cheaper than // merging them to a new block. back()->Append(*block); } else { // Boundary blocks cannot be appended in place. Merge them to a new // block. RIEGELI_ASSERT_LE(block->size(), RawBlock::kMaxCapacity - back()->size()) << "Sum of sizes of two tiny blocks exceeds RawBlock::kMaxCapacity"; IntrusiveSharedPtr merged = RawBlock::NewInternal( NewBlockCapacity(back()->size(), block->size(), 0, options)); merged->Append(*back()); merged->Append(*block); SetBack(std::move(merged)); } size_ += block->size(); return; } if (back()->empty()) { // The last block is empty and must be removed. size_ += block->size(); SetBack(std::forward(block)); return; } if (back()->wasteful()) { // The last block must reduce waste. if (back()->can_append(block->size()) && block->size() <= kAllocationCost + back()->size()) { // Appending in place is possible and is cheaper than rewriting the last // block. back()->Append(*block); size_ += block->size(); return; } // Appending in place is not possible, or rewriting the last block is // cheaper. SetBack(back()->Copy()); } } size_ += block->size(); PushBack(std::forward(block)); } void Chain::Append(const absl::Cord& src, Options options) { AppendCord(src, options); } void Chain::Append(absl::Cord&& src, Options options) { AppendCord(std::move(src), options); } template void Chain::AppendCord(CordRef&& src, Options options) { if (const std::optional flat = src.TryFlat(); flat != std::nullopt) { if (flat->size() <= MaxBytesToCopy(options)) { Append(*flat, options); } else { Append(Block(riegeli::Maker(std::forward(src))), options); } return; } AppendCordSlow(std::forward(src), options); } template inline void Chain::AppendCordSlow(CordRef&& src, Options options) { // Avoid creating wasteful blocks and then rewriting them: append copied // fragments when their accumulated size is known, tweaking `size_hint` for // block sizing. absl::InlinedVector copied_fragments; Options copy_options = options; copy_options.set_size_hint(size()); absl::Cord::CharIterator iter = src.char_begin(); while (iter != src.char_end()) { const absl::string_view fragment = absl::Cord::ChunkRemaining(iter); if (fragment.size() <= kMaxBytesToCopy) { copied_fragments.push_back(fragment); copy_options.set_size_hint(*copy_options.size_hint() + fragment.size()); absl::Cord::Advance(&iter, fragment.size()); } else { for (const absl::string_view copied_fragment : copied_fragments) { Append(copied_fragment, copy_options); } copied_fragments.clear(); Append(Block(riegeli::Maker( riegeli::Invoker([&iter, size = fragment.size()]() { return absl::Cord::AdvanceAndRead(&iter, size); }))), options); copy_options.set_size_hint(size()); } } for (const absl::string_view copied_fragment : copied_fragments) { Append(copied_fragment, options); } } void Chain::Prepend(BytesRef src, Options options) { while (!src.empty()) { const absl::Span buffer = PrependBuffer(1, src.size(), src.size(), options); std::memcpy(buffer.data(), src.data() + (src.size() - buffer.size()), buffer.size()); src.remove_suffix(buffer.size()); } } void Chain::Prepend(const Chain& src, Options options) { PrependChain(src, options); } void Chain::Prepend(Chain&& src, Options options) { PrependChain(std::move(src), options); } template inline void Chain::PrependChain(ChainRef&& src, Options options) { if (src.begin_ == src.end_) { Prepend(src.short_data(), options); return; } RIEGELI_CHECK_LE(src.size(), std::numeric_limits::max() - size_) << "Failed precondition of Chain::Prepend(Chain): " "Chain size overflow"; const BlockPtr* src_iter = src.end_; // If the last block of src is handled specially, // `(--src_iter)->block_ptr->Unref()` skips it so that // `PrependBlocks()` does not prepend it again. if (begin_ == end_) { if (src.back()->tiny() || (src.end_ - src.begin_ > 1 && src.back()->wasteful())) { // The last block of `src` must be rewritten. Merge short data with it to // a new block. if (!short_data().empty() || !src.back()->empty()) { RIEGELI_ASSERT_LE(src.back()->size(), RawBlock::kMaxCapacity - size_) << "Sum of sizes of short data and a tiny or wasteful block " "exceeds RawBlock::kMaxCapacity"; const size_t capacity = src.end_ - src.begin_ == 1 ? NewBlockCapacity(size_, src.back()->size(), 0, options) : size_ + src.back()->size(); IntrusiveSharedPtr merged = RawBlock::NewInternal(capacity); merged->Prepend(short_data()); merged->Prepend(*src.back()); PushFront(std::move(merged)); } (--src_iter)->block_ptr->Unref(); } else if (!empty()) { // Copy short data to a real block. IntrusiveSharedPtr real = RawBlock::NewInternal(kMaxShortDataSize); real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); PushFront(std::move(real)); } } else { if (front()->tiny() && src.back()->tiny()) { merge: // Boundary blocks must be merged, or they are both empty or wasteful so // merging them is cheaper than rewriting them separately. if (src.back()->empty() && front()->empty()) { PopFront(); } else if (front()->can_prepend(src.back()->size()) && (src.end_ - src.begin_ == 1 || !front()->wasteful(src.back()->size()))) { // Boundary blocks can be prepended in place; this is always cheaper // than merging them to a new block. front()->Prepend(*src.back()); RefreshFront(); } else { // Boundary blocks cannot be prepended in place. Merge them to a new // block. RIEGELI_ASSERT_LE(src.back()->size(), RawBlock::kMaxCapacity - front()->size()) << "Sum of sizes of two tiny or wasteful blocks exceeds " "RawBlock::kMaxCapacity"; const size_t capacity = src.end_ - src.begin_ == 1 ? NewBlockCapacity(front()->size(), src.back()->size(), 0, options) : front()->size() + src.back()->size(); IntrusiveSharedPtr merged = RawBlock::NewInternal(capacity); merged->Prepend(*front()); merged->Prepend(*src.back()); SetFront(std::move(merged)); } (--src_iter)->block_ptr->Unref(); } else if (front()->empty()) { if (src.end_ - src.begin_ > 1 && src.back()->wasteful()) goto merge; // The first block is empty and must be removed. PopFront(); } else if (front()->wasteful()) { if (src.end_ - src.begin_ > 1 && (src.back()->empty() || src.back()->wasteful())) { goto merge; } // The first block must reduce waste. if (front()->can_prepend(src.back()->size()) && (src.end_ - src.begin_ == 1 || !front()->wasteful(src.back()->size())) && src.back()->size() <= kAllocationCost + front()->size()) { // Prepending in place is possible and is cheaper than rewriting the // first block. front()->Prepend(*src.back()); RefreshFront(); (--src_iter)->block_ptr->Unref(); } else { // Prepending in place is not possible, or rewriting the first block is // cheaper. SetFrontSameSize(front()->Copy()); } } else if (src.end_ - src.begin_ > 1) { if (src.back()->empty()) { // The last block of `src` is empty and must be skipped. (--src_iter)->block_ptr->Unref(); } else if (src.back()->wasteful()) { // The last block of `src` must reduce waste. if (front()->can_prepend(src.back()->size()) && !front()->wasteful(src.back()->size())) { // Prepending in place is possible; this is always cheaper than // rewriting the last block of `src`. front()->Prepend(*src.back()); RefreshFront(); } else { // Prepending in place is not possible. PushFront(src.back()->Copy()); } (--src_iter)->block_ptr->Unref(); } } } size_ += src.size_; PrependBlocks(src.begin_, src_iter); src.DropPassedBlocks(Ownership()); } void Chain::Prepend(const Block& src, Options options) { if (src.raw_block() != nullptr) PrependRawBlock(src.raw_block(), options); } void Chain::Prepend(Block&& src, Options options) { if (src.raw_block() != nullptr) { PrependRawBlock(std::move(src).raw_block(), options); } } template inline void Chain::PrependRawBlock(RawBlockPtrRef&& block, Options options) { RIEGELI_CHECK_LE(block->size(), std::numeric_limits::max() - size_) << "Failed precondition of Chain::Prepend(Block): " "Chain size overflow"; if (begin_ == end_) { if (!short_data().empty()) { if (block->tiny()) { // The block must be rewritten. Merge short data with it to a new block. RIEGELI_ASSERT_LE(block->size(), RawBlock::kMaxCapacity - size_) << "Sum of sizes of short data and a tiny block exceeds " "RawBlock::kMaxCapacity"; const size_t capacity = NewBlockCapacity(size_, block->size(), 0, options); IntrusiveSharedPtr merged = RawBlock::NewInternal(capacity); merged->Prepend(short_data()); merged->Prepend(*block); PushFront(std::move(merged)); size_ += block->size(); return; } // Copy short data to a real block. IntrusiveSharedPtr real = RawBlock::NewInternal(kMaxShortDataSize); real->AppendWithExplicitSizeToCopy(short_data(), kMaxShortDataSize); PushFront(std::move(real)); } } else { if (front()->tiny() && block->tiny()) { // Boundary blocks must be merged. if (front()->can_prepend(block->size())) { // Boundary blocks can be prepended in place; this is always cheaper // than merging them to a new block. front()->Prepend(*block); RefreshFront(); } else { // Boundary blocks cannot be prepended in place. Merge them to a new // block. RIEGELI_ASSERT_LE(block->size(), RawBlock::kMaxCapacity - front()->size()) << "Sum of sizes of two tiny blocks exceeds RawBlock::kMaxCapacity"; IntrusiveSharedPtr merged = RawBlock::NewInternal( NewBlockCapacity(front()->size(), block->size(), 0, options)); merged->Prepend(*front()); merged->Prepend(*block); SetFront(std::move(merged)); } size_ += block->size(); return; } if (front()->empty()) { // The first block is empty and must be removed. size_ += block->size(); SetFront(std::forward(block)); return; } if (front()->wasteful()) { // The first block must reduce waste. if (front()->can_prepend(block->size()) && block->size() <= kAllocationCost + front()->size()) { // Prepending in place is possible and is cheaper than rewriting the // first block. front()->Prepend(*block); RefreshFront(); size_ += block->size(); return; } // Prepending in place is not possible, or rewriting the first block is // cheaper. SetFrontSameSize(front()->Copy()); } } size_ += block->size(); PushFront(std::forward(block)); } void Chain::Prepend(const absl::Cord& src, Options options) { PrependCord(src, options); } void Chain::Prepend(absl::Cord&& src, Options options) { PrependCord(std::move(src), options); } template inline void Chain::PrependCord(CordRef&& src, Options options) { if (src.size() <= MaxBytesToCopy(options)) { if (const std::optional flat = src.TryFlat(); flat != std::nullopt) { Prepend(*flat, options); return; } } Prepend(Chain(std::forward(src)), options); } void Chain::AppendFrom(absl::Cord::CharIterator& iter, size_t length, Options options) { // Avoid creating wasteful blocks and then rewriting them: append copied // fragments when their accumulated size is known, tweaking `size_hint` for // block sizing. absl::InlinedVector copied_fragments; Options copy_options = options; copy_options.set_size_hint(size()); while (length > 0) { absl::string_view fragment = absl::Cord::ChunkRemaining(iter); fragment = absl::string_view(fragment.data(), UnsignedMin(fragment.size(), length)); if (fragment.size() <= kMaxBytesToCopy) { copied_fragments.push_back(fragment); copy_options.set_size_hint(*copy_options.size_hint() + fragment.size()); absl::Cord::Advance(&iter, fragment.size()); } else { for (const absl::string_view copied_fragment : copied_fragments) { Append(copied_fragment, copy_options); } copied_fragments.clear(); Append(Block(riegeli::Maker( riegeli::Invoker([&iter, size = fragment.size()]() { return absl::Cord::AdvanceAndRead(&iter, size); }))), options); copy_options.set_size_hint(size()); } length -= fragment.size(); } for (const absl::string_view copied_fragment : copied_fragments) { Append(copied_fragment, options); } } void Chain::RemoveSuffix(size_t length, Options options) { if (length == 0) return; RIEGELI_CHECK_LE(length, size()) << "Failed precondition of Chain::RemoveSuffix(): " << "length to remove greater than current size"; size_ -= length; if (begin_ == end_) { // `Chain` has short data which have suffix removed in place. return; } while (length > back()->size()) { length -= back()->size(); PopBack(); RIEGELI_ASSERT_NE(begin_, end_) << "Failed invariant of Chain: " "sum of block sizes smaller than Chain size"; } if (back()->TryRemoveSuffix(length)) { if (end_ - begin_ > 1 && back()->tiny() && end_[-2].block_ptr->tiny()) { // Last two blocks must be merged. IntrusiveSharedPtr last = PopBack(); if (!last->empty()) { RIEGELI_ASSERT_LE(last->size(), RawBlock::kMaxCapacity - back()->size()) << "Sum of sizes of two tiny blocks exceeds " "RawBlock::kMaxCapacity"; IntrusiveSharedPtr merged = RawBlock::NewInternal( NewBlockCapacity(back()->size() + last->size(), 0, 0, options)); merged->Append(*back()); merged->Append(*last); SetBack(std::move(merged)); } } return; } IntrusiveSharedPtr last = PopBack(); if (length == last->size()) return; absl::string_view data = *last; data.remove_suffix(length); // Compensate for increasing `size_` by `Append()`. size_ -= data.size(); Append(ExternalRef(riegeli::Invoker(MakeBlock(), std::move(last)), data), options); } void Chain::RemovePrefix(size_t length, Options options) { if (length == 0) return; RIEGELI_CHECK_LE(length, size()) << "Failed precondition of Chain::RemovePrefix(): " << "length to remove greater than current size"; size_ -= length; if (begin_ == end_) { // `Chain` has short data which have prefix removed by shifting the rest. std::memmove(short_data_begin(), short_data_begin() + length, size_); return; } while (length > front()->size()) { length -= front()->size(); PopFront(); RIEGELI_ASSERT_NE(begin_, end_) << "Failed invariant of Chain: " "sum of block sizes smaller than Chain size"; } if (front()->TryRemovePrefix(length)) { RefreshFront(); if (end_ - begin_ > 1 && front()->tiny() && begin_[1].block_ptr->tiny()) { // First two blocks must be merged. IntrusiveSharedPtr first = PopFront(); if (!first->empty()) { RIEGELI_ASSERT_LE(first->size(), RawBlock::kMaxCapacity - front()->size()) << "Sum of sizes of two tiny blocks exceeds " "RawBlock::kMaxCapacity"; IntrusiveSharedPtr merged = RawBlock::NewInternal( NewBlockCapacity(first->size() + front()->size(), 0, 0, options)); merged->Prepend(*front()); merged->Prepend(*first); SetFront(std::move(merged)); } } return; } IntrusiveSharedPtr first = PopFront(); if (length == first->size()) return; absl::string_view data = *first; data.remove_prefix(length); // Compensate for increasing `size_` by `Prepend()`. size_ -= data.size(); Prepend(ExternalRef(riegeli::Invoker(MakeBlock(), std::move(first)), data), options); } void swap(Chain& a, Chain& b) noexcept { using std::swap; if (a.has_here()) { a.begin_ = b.block_ptrs_.here + (a.begin_ - a.block_ptrs_.here); a.end_ = b.block_ptrs_.here + (a.end_ - a.block_ptrs_.here); } if (b.has_here()) { b.begin_ = a.block_ptrs_.here + (b.begin_ - b.block_ptrs_.here); b.end_ = a.block_ptrs_.here + (b.end_ - b.block_ptrs_.here); } swap(a.block_ptrs_, b.block_ptrs_); swap(a.begin_, b.begin_); swap(a.end_, b.end_); swap(a.size_, b.size_); } StrongOrdering Chain::Compare(const Chain& a, const Chain& b) { BlockIterator a_iter = a.blocks().cbegin(); BlockIterator b_iter = b.blocks().cbegin(); size_t this_pos = 0; size_t that_pos = 0; while (a_iter != a.blocks().cend()) { if (b_iter == b.blocks().cend()) { do { if (!a_iter->empty()) return StrongOrdering::greater; ++a_iter; } while (a_iter != a.blocks().cend()); return StrongOrdering::equal; } const size_t length = UnsignedMin(a_iter->size() - this_pos, b_iter->size() - that_pos); if (const int ordering = std::memcmp(a_iter->data() + this_pos, b_iter->data() + that_pos, length); ordering != 0) { return AsStrongOrdering(ordering); } this_pos += length; if (this_pos == a_iter->size()) { ++a_iter; this_pos = 0; } that_pos += length; if (that_pos == b_iter->size()) { ++b_iter; that_pos = 0; } } while (b_iter != b.blocks().cend()) { if (!b_iter->empty()) return StrongOrdering::less; ++b_iter; } return StrongOrdering::equal; } StrongOrdering Chain::Compare(const Chain& a, absl::string_view b) { BlockIterator a_iter = a.blocks().cbegin(); size_t this_pos = 0; size_t that_pos = 0; while (a_iter != a.blocks().cend()) { if (that_pos == b.size()) { do { if (!a_iter->empty()) return StrongOrdering::greater; ++a_iter; } while (a_iter != a.blocks().cend()); return StrongOrdering::equal; } const size_t length = UnsignedMin(a_iter->size() - this_pos, b.size() - that_pos); if (const int ordering = std::memcmp(a_iter->data() + this_pos, b.data() + that_pos, length); ordering != 0) { return AsStrongOrdering(ordering); } this_pos += length; if (this_pos == a_iter->size()) { ++a_iter; this_pos = 0; } that_pos += length; } return that_pos == b.size() ? StrongOrdering::equal : StrongOrdering::less; } void Chain::Output(std::ostream& dest) const { WriteWithPadding(dest, size(), [&] { for (const absl::string_view fragment : blocks()) { dest.write(fragment.data(), IntCast(fragment.size())); } }); } void Chain::VerifyInvariants() const { #if RIEGELI_DEBUG if (begin_ == end_) { if (has_here()) { RIEGELI_CHECK_LE(size(), kMaxShortDataSize); } else { RIEGELI_CHECK_EQ(size(), 0u); } } else { RIEGELI_CHECK_LE(begin_, end_); if (has_here()) { RIEGELI_CHECK_LE(PtrDistance(begin_, end_), 2u); } else { RIEGELI_CHECK_GE(begin_, block_ptrs_.allocated.begin); RIEGELI_CHECK_LE(end_, block_ptrs_.allocated.end); } bool is_tiny = false; size_t offset = has_allocated() ? begin_[block_offsets()].block_offset : size_t{0}; const BlockPtr* iter = begin_; do { if (is_tiny) { RIEGELI_CHECK(!iter->block_ptr->tiny()); is_tiny = false; } else { is_tiny = iter->block_ptr->tiny(); } if (iter != begin_ && iter != end_ - 1) { RIEGELI_CHECK(!iter->block_ptr->empty()); RIEGELI_CHECK(!iter->block_ptr->wasteful()); } if (has_allocated()) { RIEGELI_CHECK_EQ(iter[block_offsets()].block_offset, offset); } offset += iter->block_ptr->size(); ++iter; } while (iter != end_); if (has_allocated()) offset -= begin_[block_offsets()].block_offset; RIEGELI_CHECK_EQ(size(), offset); } #endif } } // namespace riegeli ================================================ FILE: riegeli/base/chain.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_CHAIN_H_ #define RIEGELI_BASE_CHAIN_H_ #include "riegeli/base/chain_base.h" // IWYU pragma: export #include "riegeli/base/chain_details.h" // IWYU pragma: export #include "riegeli/base/external_ref_base.h" // IWYU pragma: keep #include "riegeli/base/external_ref_support.h" // IWYU pragma: keep #endif // RIEGELI_BASE_CHAIN_H_ ================================================ FILE: riegeli/base/chain_base.h ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_CHAIN_BASE_H_ #define RIEGELI_BASE_CHAIN_BASE_H_ // IWYU pragma: private, include "riegeli/base/chain.h" #include #include #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/macros.h" #include "absl/strings/cord.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/buffering.h" #include "riegeli/base/bytes_ref.h" #include "riegeli/base/compare.h" #include "riegeli/base/external_data.h" #include "riegeli/base/external_ref_support.h" #include "riegeli/base/initializer.h" #include "riegeli/base/intrusive_shared_ptr.h" #include "riegeli/base/memory_estimator.h" #include "riegeli/base/ownership.h" #include "riegeli/base/ref_count.h" #include "riegeli/base/type_traits.h" namespace riegeli { class ExternalRef; // A `Chain` represents a sequence of bytes. It supports efficient appending and // prepending, and sharing memory with other `Chain`s and other types. It does // not support efficient random access. // // A `Chain` can be written using `ChainWriter` and `ChainBackwardWriter`, // and can be read using `ChainReader`. `Chain` itself exposes lower level // appending/prepending and iteration functions. // // A `Chain` is implemented with a sequence of blocks holding flat data // fragments. class Chain : public WithCompare { private: class RawBlock; // A union of either a block pointer or a block offset. Having a union makes // easier to allocate an array containing both kinds of data, with block // offsets following block pointers. union BlockPtr { RawBlock* block_ptr; size_t block_offset; }; static constexpr size_t kMaxShortDataSize = 2 * sizeof(BlockPtr); public: class Options { public: Options() noexcept {} // Expected final size, or `std::nullopt` if unknown. This may improve // performance and memory usage. // // If the size hint turns out to not match reality, nothing breaks. Options& set_size_hint(std::optional size_hint) & ABSL_ATTRIBUTE_LIFETIME_BOUND { if (size_hint == std::nullopt) { size_hint_ = std::numeric_limits::max(); } else { size_hint_ = UnsignedMin(*size_hint, std::numeric_limits::max() - 1); } return *this; } Options&& set_size_hint(std::optional size_hint) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_size_hint(size_hint)); } std::optional size_hint() const { if (size_hint_ == std::numeric_limits::max()) { return std::nullopt; } else { return size_hint_; } } // Minimal size of a block of allocated data. // // This is used initially, while the destination is small. // // Default: `kDefaultMinBlockSize` (512). Options& set_min_block_size(size_t min_block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND { min_block_size_ = UnsignedMin(min_block_size, uint32_t{1} << 31); return *this; } Options&& set_min_block_size(size_t min_block_size) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_min_block_size(min_block_size)); } size_t min_block_size() const { return min_block_size_; } // Maximal size of a block of allocated data. // // This is for performance tuning, not a guarantee: does not apply to // objects allocated separately and then appended to this `Chain`. // // Default: `kDefaultMaxBlockSize` (64K). Options& set_max_block_size(size_t max_block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT_GT(max_block_size, 0u) << "Failed precondition of Chain::Options::set_max_block_size(): " "zero block size"; max_block_size_ = UnsignedMin(max_block_size, uint32_t{1} << 31); return *this; } Options&& set_max_block_size(size_t max_block_size) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_max_block_size(max_block_size)); } size_t max_block_size() const { return max_block_size_; } // A shortcut for `set_min_block_size(block_size)` with // `set_max_block_size(block_size)`. Options& set_block_size(size_t block_size) & ABSL_ATTRIBUTE_LIFETIME_BOUND { return set_min_block_size(block_size).set_max_block_size(block_size); } Options&& set_block_size(size_t block_size) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(set_block_size(block_size)); } private: // `std::nullopt` is encoded as `std::numeric_limits::max()` to // reduce object size. size_t size_hint_ = std::numeric_limits::max(); // Use `uint32_t` instead of `size_t` to reduce the object size. uint32_t min_block_size_ = uint32_t{kDefaultMinBlockSize}; uint32_t max_block_size_ = uint32_t{kDefaultMaxBlockSize}; }; class Block; class BlockRef; class BlockIterator; class Blocks; struct BlockAndChar; // A sentinel value for the `max_length` parameter of // `AppendBuffer()`/`PrependBuffer()`. static constexpr size_t kAnyLength = std::numeric_limits::max(); static constexpr size_t kMaxBytesToCopyToEmpty = kMaxShortDataSize; size_t MaxBytesToCopy(Options options = Options()) const { if (options.size_hint() != std::nullopt && size() < *options.size_hint()) { return UnsignedClamp(*options.size_hint() - size() - 1, kMaxBytesToCopyToEmpty, kMaxBytesToCopy); } if (empty()) return kMaxBytesToCopyToEmpty; return kMaxBytesToCopy; } // Allocated size of an external block containing an external object of type // `T`. template static constexpr size_t kExternalAllocatedSize(); constexpr Chain() = default; // Converts from a string-like type. explicit Chain(BytesRef src); explicit Chain(ExternalRef src); template ::value, int> = 0> explicit Chain(Src&& src); explicit Chain(Block src); explicit Chain(const absl::Cord& src); explicit Chain(absl::Cord&& src); Chain(const Chain& that); Chain& operator=(const Chain& that); // The source `Chain` is left cleared. // // Moving a `Chain` invalidates its `BlockIterator`s and data pointers, but // the shape of blocks (their number and sizes) remains unchanged. Chain(Chain&& that) noexcept; Chain& operator=(Chain&& that) noexcept; ~Chain(); // Makes `*this` equivalent to a newly constructed `Chain`. This avoids // constructing a temporary `Chain` and moving from it. ABSL_ATTRIBUTE_REINITIALIZES void Reset(); ABSL_ATTRIBUTE_REINITIALIZES void Reset(BytesRef src); ABSL_ATTRIBUTE_REINITIALIZES void Reset(ExternalRef src); template ::value, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset(Src&& src); ABSL_ATTRIBUTE_REINITIALIZES void Reset(Block src); ABSL_ATTRIBUTE_REINITIALIZES void Reset(const absl::Cord& src); ABSL_ATTRIBUTE_REINITIALIZES void Reset(absl::Cord&& src); // Removes all data. ABSL_ATTRIBUTE_REINITIALIZES void Clear(); // A container of `absl::string_view` blocks comprising data of the `Chain`. Blocks blocks() const ABSL_ATTRIBUTE_LIFETIME_BOUND; bool empty() const { return size_ == 0; } size_t size() const { return size_; } void CopyTo(char* dest) const; void AppendTo(std::string& dest) const&; void AppendTo(std::string& dest) &&; void AppendTo(absl::Cord& dest) const&; void AppendTo(absl::Cord& dest) &&; void PrependTo(absl::Cord& dest) const&; void PrependTo(absl::Cord& dest) &&; explicit operator std::string() const&; explicit operator std::string() &&; explicit operator absl::Cord() const&; explicit operator absl::Cord() &&; // If the `Chain` contents are flat, returns them, otherwise returns // `std::nullopt`. std::optional TryFlat() const ABSL_ATTRIBUTE_LIFETIME_BOUND; // If the `Chain` contents are not flat, flattens them in place. Returns flat // contents. absl::string_view Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND; // Locates the block containing the given character position, and the // character index within the block. // // The opposite conversion is `Chain::BlockIterator::CharIndexInChain()`. // // Precondition: `char_index_in_chain <= size()` BlockAndChar BlockAndCharIndex(size_t char_index_in_chain) const; // Shows internal structure in a human-readable way, for debugging. void DumpStructure(std::ostream& dest) const; // Supports `MemoryEstimator`. friend void RiegeliRegisterSubobjects(const Chain* self, MemoryEstimator& memory_estimator) { self->RegisterSubobjects(memory_estimator); } // Appends/prepends some uninitialized space. The buffer will have length at // least `min_length`, preferably `recommended_length`, and at most // `max_length`. // // If `min_length == 0`, returns whatever space was already allocated // (possibly an empty buffer) without invalidating existing pointers. If the // `Chain` was empty then the empty contents can be moved. // // If `recommended_length < min_length`, `recommended_length` is assumed to be // `min_length`. // // If `max_length == kAnyLength`, there is no maximum. // // Precondition: `min_length <= max_length` absl::Span AppendBuffer( size_t min_length, size_t recommended_length = 0, size_t max_length = kAnyLength, Options options = Options()) ABSL_ATTRIBUTE_LIFETIME_BOUND; absl::Span PrependBuffer( size_t min_length, size_t recommended_length = 0, size_t max_length = kAnyLength, Options options = Options()) ABSL_ATTRIBUTE_LIFETIME_BOUND; // Equivalent to `AppendBuffer()`/`PrependBuffer()` with // `min_length == max_length`. absl::Span AppendFixedBuffer(size_t length, Options options = Options()) ABSL_ATTRIBUTE_LIFETIME_BOUND; absl::Span PrependFixedBuffer( size_t length, Options options = Options()) ABSL_ATTRIBUTE_LIFETIME_BOUND; // Appends/prepends a string-like type. void Append(BytesRef src, Options options = Options()); void Append(ExternalRef src); void Append(ExternalRef src, Options options); template ::value, int> = 0> void Append(Src&& src); template ::value, int> = 0> void Append(Src&& src, Options options); void Append(const Chain& src, Options options = Options()); void Append(Chain&& src, Options options = Options()); void Append(const Block& src, Options options = Options()); void Append(Block&& src, Options options = Options()); void Append(const absl::Cord& src, Options options = Options()); void Append(absl::Cord&& src, Options options = Options()); void Prepend(BytesRef src, Options options = Options()); void Prepend(ExternalRef src); void Prepend(ExternalRef src, Options options); template ::value, int> = 0> void Prepend(Src&& src); template ::value, int> = 0> void Prepend(Src&& src, Options options); void Prepend(const Chain& src, Options options = Options()); void Prepend(Chain&& src, Options options = Options()); void Prepend(const Block& src, Options options = Options()); void Prepend(Block&& src, Options options = Options()); void Prepend(const absl::Cord& src, Options options = Options()); void Prepend(absl::Cord&& src, Options options = Options()); // `AppendFrom(iter, length)` is equivalent to // `Append(absl::Cord::AdvanceAndRead(&iter, length))` but more efficient. void AppendFrom(absl::Cord::CharIterator& iter, size_t length, Options options = Options()); // Removes suffix/prefix of the given length. // // Precondition: `length <= size()` void RemoveSuffix(size_t length, Options options = Options()); void RemovePrefix(size_t length, Options options = Options()); friend void swap(Chain& a, Chain& b) noexcept; friend bool operator==(const Chain& a, const Chain& b) { return a.size() == b.size() && Compare(a, b) == 0; } friend StrongOrdering RIEGELI_COMPARE(const Chain& a, const Chain& b) { return Compare(a, b); } friend bool operator==(const Chain& a, absl::string_view b) { return a.size() == b.size() && Compare(a, b) == 0; } friend StrongOrdering RIEGELI_COMPARE(const Chain& a, absl::string_view b) { return Compare(a, b); } template friend HashState AbslHashValue(HashState hash_state, const Chain& self) { return self.HashValue(std::move(hash_state)); } // Default stringification by `absl::StrCat()` etc. template friend void AbslStringify(Sink& dest, const Chain& src) { src.Stringify(dest); } friend std::ostream& operator<<(std::ostream& dest, const Chain& src) { src.Output(dest); return dest; } // Supports `riegeli::Debug()`. template friend void RiegeliDebug(const Chain& src, DebugStream& dest) { src.Debug(dest); } // Supports `absl::Format(&chain, format, args...)`. friend void AbslFormatFlush(Chain* dest, absl::string_view src) { dest->Append(src); } // For testing. If `RIEGELI_DEBUG` is defined, verifies internal invariants, // otherwise does nothing. void VerifyInvariants() const; private: class BlockPtrPtr; struct MakeBlock; struct ExternalMethods; template struct ExternalMethodsFor; struct Empty {}; struct Allocated { // The extent of the allocated array of block pointers. This array is // immediately followed by the array of block offsets of the same size, // used for efficient finding of the block covering the given position. // Only some middle portion of each array is filled. // // The offset of the first block is not necessarily 0 but an arbitrary value // (with possible wrapping around the `size_t` range), to avoid having to // update all offsets in `Prepend()` or `RemovePrefix()`. BlockPtr* begin; BlockPtr* end; }; union BlockPtrs { constexpr BlockPtrs() noexcept : empty() {} // If the `Chain` is empty, no block pointers are needed. Some union member // is needed though for the default constructor to be constexpr. Empty empty; // If `begin_ == end_`, `size_` characters. // // If also `has_here()`, then there are 0 pointers in `here` so `short_data` // can safely contain `size_` characters. If also `has_allocated()`, then // `size_ == 0`, and `EnsureHasHere()` must be called before writing to // `short_data`. char short_data[kMaxShortDataSize]; // If `has_here()`, array of block pointers between `begin_` i.e. `here` and // `end_` (0 to 2 pointers). In this case block offsets are implicit. BlockPtr here[2]; // If `has_allocated()`, pointers to a heap-allocated array of block // pointers and block offsets. Allocated allocated; }; // When deciding whether to copy an array of bytes or perform a small memory // allocation, prefer copying up to this length. static constexpr size_t kAllocationCost = 512; bool ClearSlow(); absl::string_view FlattenSlow(); bool has_here() const { return begin_ == block_ptrs_.here; } bool has_allocated() const { return begin_ != block_ptrs_.here; } absl::string_view short_data() const; char* short_data_begin(); const char* short_data_begin() const; static BlockPtr* NewBlockPtrs(size_t capacity); void DeleteBlockPtrs(); // If `has_allocated()`, delete the block pointer array and make `has_here()` // `true`. This is used before appending to `short_data`. // // Precondition: `begin_ == end_` void EnsureHasHere(); void UnrefBlocks(); static void UnrefBlocks(const BlockPtr* begin, const BlockPtr* end); static void UnrefBlocksSlow(const BlockPtr* begin, const BlockPtr* end); void DropPassedBlocks(PassOwnership); void DropPassedBlocks(ShareOwnership) const; // The offset of the block offsets part of the block pointer array, in array // elements. size_t block_offsets() const { RIEGELI_ASSERT(has_allocated()) << "Failed precondition of block_offsets(): " "block pointer array is not allocated"; return PtrDistance(block_ptrs_.allocated.begin, block_ptrs_.allocated.end); } // Returns the last block. Can be changed in place (if its own constraints // allow that). RawBlock* const& back() const { return end_[-1].block_ptr; } // Returns the first block. If its size changes, this must be reflected in the // array of block offset, e.g. with `RefreshFront()`. RawBlock* const& front() const { return begin_[0].block_ptr; } void Initialize(absl::string_view src); void InitializeSlow(absl::string_view src); void Initialize(Block src); void Initialize(const absl::Cord& src); void Initialize(absl::Cord&& src); // This template is defined and used only in chain.cc. template void InitializeFromCord(CordRef&& src); void Initialize(const Chain& src); void CopyToSlow(char* dest) const; std::string ToString() const; void AppendToSlow(absl::Cord& dest) const&; void AppendToSlow(absl::Cord& dest) &&; void PrependToSlow(absl::Cord& dest) const&; void PrependToSlow(absl::Cord& dest) &&; IntrusiveSharedPtr SetBack(IntrusiveSharedPtr block); IntrusiveSharedPtr SetFront(IntrusiveSharedPtr block); // Like `SetFront()`, but skips the `RefreshFront()` step. This is enough if // the block has the same size as the block being replaced. IntrusiveSharedPtr SetFrontSameSize( IntrusiveSharedPtr block); // Recomputes the block offset of the first block if needed. void RefreshFront(); void PushBack(IntrusiveSharedPtr block); void PushFront(IntrusiveSharedPtr block); IntrusiveSharedPtr PopBack(); IntrusiveSharedPtr PopFront(); // This template is defined and used only in chain.cc. template void AppendBlocks(const BlockPtr* begin, const BlockPtr* end); // This template is defined and used only in chain.cc. template void PrependBlocks(const BlockPtr* begin, const BlockPtr* end); void ReserveBack(size_t extra_capacity); void ReserveFront(size_t extra_capacity); void ReserveBackSlow(size_t extra_capacity); void ReserveFrontSlow(size_t extra_capacity); // Decides about the capacity of a new block to be appended/prepended. // // If `replaced_length > 0`, the block will replace an existing block of that // size. In addition to `replaced_length`, it requires the capacity of at // least `min_length`, preferably `recommended_length`. size_t NewBlockCapacity(size_t replaced_length, size_t min_length, size_t recommended_length, Options options) const; // This template is defined and used only in chain.cc. template void AppendChain(ChainRef&& src, Options options); // This template is defined and used only in chain.cc. template void PrependChain(ChainRef&& src, Options options); // This template is defined and used only in chain.cc. template void AppendRawBlock(RawBlockPtrRef&& block, Options options = Options()); // This template is defined and used only in chain.cc. template void PrependRawBlock(RawBlockPtrRef&& block, Options options = Options()); // This template is defined and used only in chain.cc. template void AppendCord(CordRef&& src, Options options); // This template is defined and used only in chain.cc. template void AppendCordSlow(CordRef&& src, Options options); // This template is defined and used only in chain.cc. template void PrependCord(CordRef&& src, Options options); void RegisterSubobjects(MemoryEstimator& memory_estimator) const; static StrongOrdering Compare(const Chain& a, const Chain& b); static StrongOrdering Compare(const Chain& a, absl::string_view b); template HashState HashValue(HashState hash_state) const; template void Stringify(Sink& dest) const; void Output(std::ostream& dest) const; template void Debug(DebugStream& dest) const; BlockPtrs block_ptrs_; // The range of the block pointers array which is actually used. // // Invariants: // `begin_ <= end_` // if `has_here()` then `begin_ == block_ptrs_.here` // and `end_ <= block_ptrs_.here + 2` // if `has_allocated()` then `begin_ >= block_ptrs_.allocated.begin` // and `end_ <= block_ptrs_.allocated.end` BlockPtr* begin_ = block_ptrs_.here; BlockPtr* end_ = block_ptrs_.here; // Invariants: // if `begin_ == end_` then `size_ <= kMaxShortDataSize` // if `begin_ == end_ && has_allocated()` then `size_ == 0` // if `begin_ != end_` then // `size_` is the sum of sizes of blocks in the range [`begin_`..`end_`) size_t size_ = 0; }; // Implementation details follow. // `Chain` representation consists of blocks. // // An internal block holds an allocated array which consists of free space // before data, data, and free space after data. Block size is the size of // data; block capacity is the size of the allocated array. // // An external block holds some object which keeps a data array alive, the // destructor of the object, and the address of the data array. // // Definitions: // - empty block: a block with size == 0 // - tiny block: a block with size < `kDefaultMinBlockSize` // - wasteful block: a block with free space > size + `kDefaultMinBlockSize` // // Invariants of a `Chain`: // - A block can be empty or wasteful only if it is the first or last block. // - Tiny blocks must not be adjacent. class Chain::RawBlock { public: static constexpr size_t kInternalAllocatedOffset(); static constexpr size_t kMaxCapacity = size_t{std::numeric_limits::max()}; // Creates an internal block. static IntrusiveSharedPtr NewInternal(size_t min_capacity); // Constructs an internal block. This constructor is public for // `SizeReturningNewAligned()`. explicit RawBlock(const size_t* raw_capacity); // Constructs an external block containing an external object of type `T`, // and sets block data to `BytesRef(new_object)`. This constructor is public // for `NewAligned()`. template explicit RawBlock(Initializer object); // Constructs an external block containing an external object of type `T`, and // sets block data to `data`. This constructor is public for `NewAligned()`. template explicit RawBlock(Initializer object, absl::string_view substr); // Allocated size of an external block containing an external object of type // `T`. template static constexpr size_t kExternalAllocatedSize(); template RawBlock* Ref(); template void Unref(); IntrusiveSharedPtr Copy(); bool TryClear(); /*implicit*/ operator absl::string_view() const { return substr_; } bool empty() const { return substr_.empty(); } size_t size() const { return substr_.size(); } const char* data_begin() const { return substr_.data(); } const char* data_end() const { return substr_.data() + substr_.size(); } // Returns a reference to the external object, assuming that this is an // external block holding an object of type `T`. template T& unchecked_external_object(); template const T& unchecked_external_object() const; // Returns a pointer to the external object if this is an external block // holding an object of type `T`, otherwise returns `nullptr`. template const T* checked_external_object() const; // Returns a pointer to the external object if this is an external block // holding an object of type `T` and the block has a unique owner, otherwise // returns `nullptr`. template T* checked_external_object_with_unique_owner(); bool tiny(size_t extra_size = 0) const; bool wasteful(size_t extra_size = 0) const; // Shows internal structure in a human-readable way, for debugging. void DumpStructure(std::ostream& dest) const; // Supports `MemoryEstimator`. friend size_t RiegeliDynamicSizeOf(const RawBlock* self) { return self->DynamicSizeOf(); } // Supports `MemoryEstimator`. friend void RiegeliRegisterSubobjects(const RawBlock* self, MemoryEstimator& memory_estimator) { self->RegisterSubobjects(memory_estimator); } bool can_append(size_t length) const; bool can_prepend(size_t length) const; absl::Span AppendBuffer(size_t max_length); absl::Span PrependBuffer(size_t max_length); void Append(absl::string_view src, size_t space_before = 0); // Reads `size_to_copy` from `src.data()` but accounts for `src.size()`. // Faster than `Append()` if `size_to_copy` is a compile time constant, but // requires `size_to_copy` bytes to be readable, possibly past the end of src. // // Precondition: `size_to_copy >= src.size()` void AppendWithExplicitSizeToCopy(absl::string_view src, size_t size_to_copy); void Prepend(absl::string_view src, size_t space_after = 0); bool TryRemoveSuffix(size_t length); bool TryRemovePrefix(size_t length); private: template friend struct ExternalMethodsFor; struct External { // Type-erased methods of the object. const ExternalMethods* methods; // Lowest possible beginning of the object (actual object has a different // type and can begin at a higher address due to alignment). char object_lower_bound[1]; }; template static constexpr size_t kExternalObjectOffset(); #if RIEGELI_DEBUG template , int> = 0> static void AssertSubstr(const T& object, absl::string_view substr) { if (!substr.empty()) { const BytesRef whole = object; RIEGELI_ASSERT(std::greater_equal<>()(substr.data(), whole.data())) << "Failed precondition of Chain::Block::Block(): " "substring not contained in whole data"; RIEGELI_ASSERT(std::less_equal<>()(substr.data() + substr.size(), whole.data() + whole.size())) << "Failed precondition of Chain::Block::Block(): " "substring not contained in whole data"; } } template < typename T, std::enable_if_t, int> = 0> #else template #endif static void AssertSubstr(ABSL_ATTRIBUTE_UNUSED const T& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) { } bool is_mutable() const { return is_internal() && has_unique_owner(); } bool has_unique_owner() const; bool is_internal() const { return allocated_end_ != nullptr; } bool is_external() const { return allocated_end_ == nullptr; } size_t capacity() const; size_t space_before() const; size_t space_after() const; size_t DynamicSizeOf() const; void RegisterSubobjects(MemoryEstimator& memory_estimator) const; RefCount ref_count_; absl::string_view substr_; // If `is_internal()`, end of allocated space. If `is_external()`, `nullptr`. // This distinguishes internal from external blocks. char* allocated_end_ = nullptr; union { // If `is_internal()`, beginning of data (actual allocated size is larger). char allocated_begin_[1]; // If `is_external()`, the remaining fields. External external_; }; }; // Represents a reference counted pointer to a single block of a `Chain`. class Chain::Block { public: // Creates an empty `Block`. Block() = default; // Given an object which owns a byte array, converts it to a `Block` by // attaching the object, avoiding copying the bytes. // // `ExternalRef` is a higher level mechanism which chooses between sharing the // object and copying the data. // // The `object` parameter supports `riegeli::Maker(args...)` to construct // `T` in-place. // // If the `substr` parameter is given, `substr` must be owned by the object // after it gets created or moved. // // If the `substr` parameter is not given, `T` must be convertible to // `BytesRef`. // // `T` may also support the following member functions, either with or without // the `substr` parameter, with the following definitions assumed by default: // ``` // // Called once before the destructor, except on a moved-from object. // // If only this function is needed, `T` can be a lambda. // void operator()(absl::string_view substr) && {} // // // Shows internal structure in a human-readable way, for debugging. // friend void RiegeliDumpStructure(const T* self, absl::string_view substr, // std::ostream& dest) { // out << "[external] { }"; // } // // // Registers this object with `MemoryEstimator`. // // // // By default calls `memory_estimator.RegisterUnknownType()` and // // as an approximation of memory usage of an unknown type, registers just // // the stored `substr` if unique. // friend void RiegeliRegisterSubobjects( // const T* self, riegeli::MemoryEstimator& memory_estimator); // ``` // // The `substr` parameter of these member functions, if present, will get the // `substr` parameter passed to `FromExternal()`. Having `substr` available in // these functions might avoid storing `substr` in the external object. template >, std::is_convertible, BytesRef>>, int> = 0> explicit Block(T&& object); template explicit Block(T&& object, absl::string_view substr); Block(const Block& that) = default; Block& operator=(const Block& that) = default; Block(Block&& that) = default; Block& operator=(Block&& that) = default; /*implicit*/ operator absl::string_view() const ABSL_ATTRIBUTE_LIFETIME_BOUND { if (block_ == nullptr) return absl::string_view(); return *block_; } bool empty() const { return block_ == nullptr || block_->empty(); } size_t size() const { if (block_ == nullptr) return 0; return block_->size(); } const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND { if (block_ == nullptr) return nullptr; return block_->data_begin(); } // Indicates support for: // * `ExternalRef(const Block&)` // * `ExternalRef(Block&&)` // * `ExternalRef(const Block&, substr)` // * `ExternalRef(Block&&, substr)` friend void RiegeliSupportsExternalRef(const Block*) {} // Supports `ExternalRef`. friend Block RiegeliToChainBlock(Block* self, absl::string_view substr) { return std::move(*self).ToChainBlock(substr); } // Supports `ExternalRef`. friend absl::Cord RiegeliToCord(Block* self, absl::string_view substr) { return std::move(*self).ToCord(substr); } friend absl::Cord RiegeliToCord(const Block* self, absl::string_view substr) { return self->ToCord(substr); } // Supports `ExternalRef`. friend ExternalStorage RiegeliToExternalStorage(Block* self) { return std::move(*self).ToExternalStorage(); } // Supports `ExternalRef` and `Chain::Block`. friend void RiegeliDumpStructure(const Block* self, absl::string_view substr, std::ostream& dest) { self->DumpStructure(substr, dest); } // Supports `MemoryEstimator`. template friend void RiegeliRegisterSubobjects(const Block* self, MemoryEstimator& memory_estimator) { memory_estimator.RegisterSubobjects(&self->block_); } private: friend class Chain; // For `Block()` and `raw_block()`. explicit Block(RawBlock* block); explicit Block(RawBlock* block, absl::string_view substr); explicit Block(IntrusiveSharedPtr block); const IntrusiveSharedPtr& raw_block() const& { return block_; } IntrusiveSharedPtr&& raw_block() && { return std::move(block_); } Block ToChainBlock(absl::string_view substr) &&; absl::Cord ToCord(absl::string_view substr) &&; absl::Cord ToCord(absl::string_view substr) const&; ExternalStorage ToExternalStorage() &&; void DumpStructure(absl::string_view substr, std::ostream& dest) const; IntrusiveSharedPtr block_; }; } // namespace riegeli #endif // RIEGELI_BASE_CHAIN_BASE_H_ ================================================ FILE: riegeli/base/chain_details.h ================================================ // Copyright 2017 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_CHAIN_DETAILS_H_ #define RIEGELI_BASE_CHAIN_DETAILS_H_ // IWYU pragma: private, include "riegeli/base/chain.h" #include #include #include #include #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/optimization.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/bytes_ref.h" #include "riegeli/base/chain_base.h" #include "riegeli/base/compare.h" #include "riegeli/base/external_data.h" #include "riegeli/base/external_ref_base.h" #include "riegeli/base/external_ref_support.h" #include "riegeli/base/initializer.h" #include "riegeli/base/intrusive_shared_ptr.h" #include "riegeli/base/iterable.h" #include "riegeli/base/memory_estimator.h" #include "riegeli/base/new_aligned.h" #include "riegeli/base/ownership.h" #include "riegeli/base/type_traits.h" namespace riegeli { // Represents either `const BlockPtr*`, or one of two special values // (`kBeginShortData` and `kEndShortData`) behaving as if they were pointers in // a single-element `BlockPtr` array. class Chain::BlockPtrPtr : public WithCompare { public: explicit constexpr BlockPtrPtr(uintptr_t repr) : repr_(repr) {} static BlockPtrPtr from_ptr(const BlockPtr* ptr); bool is_special() const; const BlockPtr* as_ptr() const; BlockPtrPtr operator+(ptrdiff_t n) const; BlockPtrPtr operator-(ptrdiff_t n) const; friend ptrdiff_t operator-(BlockPtrPtr a, BlockPtrPtr b) { return Subtract(a, b); } friend bool operator==(BlockPtrPtr a, BlockPtrPtr b) { return a.repr_ == b.repr_; } friend StrongOrdering RIEGELI_COMPARE(BlockPtrPtr a, BlockPtrPtr b) { RIEGELI_ASSERT_EQ(a.is_special(), b.is_special()) << "Incompatible BlockPtrPtr values"; if (a.is_special()) return riegeli::Compare(a.repr_, b.repr_); return riegeli::Compare(a.as_ptr(), b.as_ptr()); } private: // `operator-` body is defined in a member function to gain access to private // `Chain::RawBlock` under gcc. static ptrdiff_t Subtract(BlockPtrPtr a, BlockPtrPtr b) { RIEGELI_ASSERT_EQ(a.is_special(), b.is_special()) << "Incompatible BlockPtrPtr values"; if (a.is_special()) { const ptrdiff_t byte_diff = static_cast(a.repr_) - static_cast(b.repr_); // Pointer subtraction with the element size being a power of 2 typically // rounds in the same way as right shift (towards -inf), not as division // (towards zero), so the right shift allows the compiler to eliminate the // `is_special()` check. switch (sizeof(RawBlock*)) { case 1 << 2: return byte_diff >> 2; case 1 << 3: return byte_diff >> 3; default: return byte_diff / ptrdiff_t{sizeof(RawBlock*)}; } } return a.as_ptr() - b.as_ptr(); } uintptr_t repr_; }; // Access private constructors of `Chain::Block`. struct Chain::MakeBlock { Block operator()(IntrusiveSharedPtr block) const { return Block(std::move(block)); } Block operator()(RawBlock* block) const { return Block(block); } }; class Chain::BlockRef { public: BlockRef(const BlockRef& that) = default; BlockRef& operator=(const BlockRef& that) = default; /*implicit*/ operator absl::string_view() const; bool empty() const; const char* data() const; size_t size() const; // Indicates support for: // * `ExternalRef(BlockRef)` // * `ExternalRef(BlockRef, substr)` friend void RiegeliSupportsExternalRef(const BlockRef*) {} // Supports `ExternalRef`. friend bool RiegeliExternalCopy(const BlockRef* self) { return self->ExternalCopy(); } // Supports `ExternalRef`. friend Chain::Block RiegeliToChainBlock(const BlockRef* self, absl::string_view substr) { return self->ToChainBlock(substr); } // Supports `ExternalRef`. template friend void RiegeliExternalDelegate(const BlockRef* self, absl::string_view substr, Callback&& delegate_to) { self->ExternalDelegate(substr, std::forward(delegate_to)); } // Returns a pointer to the external object if this is an external block // holding an object of type `T`, otherwise returns `nullptr`. template const T* external_object() const; private: friend class Chain; // For `BlockRef()`. explicit BlockRef(const Chain* chain, BlockPtrPtr ptr) : chain_(chain), ptr_(ptr) {} bool ExternalCopy() const; Chain::Block ToChainBlock(absl::string_view substr) const; template void ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const; const Chain* chain_; // If `*chain_` has short data, `kBeginShortData`. // If `*chain_` has block pointers, a pointer to an element of the block // pointer array. BlockPtrPtr ptr_; }; class Chain::BlockIterator : public WithCompare { public: using iterator_concept = std::random_access_iterator_tag; // `iterator_category` is only `std::input_iterator_tag` because the // `LegacyForwardIterator` requirement and above require `reference` to be // a true reference type. using iterator_category = std::input_iterator_tag; using value_type = BlockRef; using reference = value_type; using pointer = ArrowProxy; using difference_type = ptrdiff_t; BlockIterator() = default; explicit BlockIterator(const Chain* chain ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t block_index); BlockIterator(const BlockIterator& that) = default; BlockIterator& operator=(const BlockIterator& that) = default; const Chain* chain() const { return chain_; } size_t block_index() const; // Returns the char index relative to the beginning of the chain, given the // corresponding char index relative to the beginning of the block. // // The opposite conversion is `Chain::BlockAndCharIndex()`. size_t CharIndexInChain(size_t char_index_in_block = 0) const; reference operator*() const; pointer operator->() const; BlockIterator& operator++(); BlockIterator operator++(int); BlockIterator& operator--(); BlockIterator operator--(int); BlockIterator& operator+=(difference_type n); BlockIterator operator+(difference_type n) const; BlockIterator& operator-=(difference_type n); BlockIterator operator-(difference_type n) const; reference operator[](difference_type n) const; friend bool operator==(BlockIterator a, BlockIterator b) { RIEGELI_ASSERT_EQ(a.chain_, b.chain_) << "Failed precondition of operator==(Chain::BlockIterator): " "incomparable iterators"; return a.ptr_ == b.ptr_; } friend StrongOrdering RIEGELI_COMPARE(BlockIterator a, BlockIterator b) { RIEGELI_ASSERT_EQ(a.chain_, b.chain_) << "Failed precondition of operator<=>(Chain::BlockIterator): " "incomparable iterators"; return riegeli::Compare(a.ptr_, b.ptr_); } friend difference_type operator-(BlockIterator a, BlockIterator b) { RIEGELI_ASSERT_EQ(a.chain_, b.chain_) << "Failed precondition of operator-(Chain::BlockIterator): " "incomparable iterators"; return a.ptr_ - b.ptr_; } friend BlockIterator operator+(difference_type n, BlockIterator a) { return a + n; } private: friend class Chain; static constexpr BlockPtrPtr kBeginShortData{0}; static constexpr BlockPtrPtr kEndShortData{sizeof(BlockPtr)}; explicit BlockIterator(const Chain* chain, BlockPtrPtr ptr); size_t CharIndexInChainInternal() const; const Chain* chain_ = nullptr; // If `chain_ == nullptr`, `kBeginShortData`. // If `*chain_` has no block pointers and no short data, `kEndShortData`. // If `*chain_` has short data, `kBeginShortData` or `kEndShortData`. // If `*chain_` has block pointers, a pointer to an element of the block // pointer array. BlockPtrPtr ptr_ = kBeginShortData; }; class Chain::Blocks { public: using value_type = BlockRef; using reference = value_type; using const_reference = reference; using iterator = BlockIterator; using const_iterator = iterator; using reverse_iterator = std::reverse_iterator; using const_reverse_iterator = reverse_iterator; using size_type = size_t; using difference_type = ptrdiff_t; Blocks() = default; Blocks(const Blocks& that) = default; Blocks& operator=(const Blocks& that) = default; iterator begin() const; iterator cbegin() const { return begin(); } iterator end() const; iterator cend() const { return end(); } reverse_iterator rbegin() const { return reverse_iterator(end()); } reverse_iterator crbegin() const { return rbegin(); } reverse_iterator rend() const { return reverse_iterator(begin()); } reverse_iterator crend() const { return rend(); } bool empty() const; size_type size() const; reference operator[](size_type n) const; reference at(size_type n) const; reference front() const; reference back() const; private: friend class Chain; explicit Blocks(const Chain* chain) noexcept : chain_(chain) {} const Chain* chain_ = nullptr; }; // Represents the position of a character in a `Chain`. // // A `CharIterator` is not provided because it is more efficient to iterate by // blocks and process character ranges within a block. struct Chain::BlockAndChar { // Intended invariant: // if `block_iter == block_iter.chain()->blocks().cend()` // then `char_index == 0` // else `char_index < block_iter->size()` BlockIterator block_iter; size_t char_index; }; // Implementation details follow. struct Chain::ExternalMethods { void (*delete_block)(RawBlock* block); void (*dump_structure)(const RawBlock& block, std::ostream& dest); size_t dynamic_sizeof; void (*register_subobjects)(const RawBlock* block, MemoryEstimator& memory_estimator); }; namespace chain_internal { template struct HasCallOperatorSubstr : std::false_type {}; template struct HasCallOperatorSubstr()( std::declval()))>> : std::true_type {}; template struct HasCallOperatorWhole : std::false_type {}; template struct HasCallOperatorWhole()())>> : std::true_type {}; template struct HasCallOperator : std::disjunction, HasCallOperatorWhole> {}; template ::value, int> = 0> inline void CallOperator(T&& object, absl::string_view substr) { std::forward(object)(substr); } template < typename T, std::enable_if_t>, HasCallOperatorWhole>, int> = 0> inline void CallOperator(T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) { std::forward(object)(); } template < typename T, std::enable_if_t>, std::negation>>, int> = 0> inline void CallOperator(ABSL_ATTRIBUTE_UNUSED T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {} template ::value, int> = 0> inline void RegisterSubobjects(const T* object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr, MemoryEstimator& memory_estimator) { memory_estimator.RegisterSubobjects(object); } template ::value, int> = 0> inline void RegisterSubobjects(ABSL_ATTRIBUTE_UNUSED const T* object, absl::string_view substr, MemoryEstimator& memory_estimator) { memory_estimator.RegisterUnknownType(); // As an approximation of memory usage of an unknown type, register just the // stored data if unique. if (memory_estimator.RegisterNode(substr.data())) { memory_estimator.RegisterDynamicMemory(substr.size()); } } template struct HasRiegeliDumpStructureWithSubstr : std::false_type {}; template struct HasRiegeliDumpStructureWithSubstr< T, std::void_t(), std::declval(), std::declval()))>> : std::true_type {}; template struct HasRiegeliDumpStructureWithoutData : std::false_type {}; template struct HasRiegeliDumpStructureWithoutData< T, std::void_t(), std::declval()))>> : std::true_type {}; void DumpStructureDefault(std::ostream& dest); template ::value, int> = 0> inline void DumpStructure(const T* object, absl::string_view substr, std::ostream& dest) { RiegeliDumpStructure(object, substr, dest); } template < typename T, std::enable_if_t< std::conjunction_v>, HasRiegeliDumpStructureWithoutData>, int> = 0> inline void DumpStructure(const T* object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr, std::ostream& dest) { RiegeliDumpStructure(object, dest); } template < typename T, std::enable_if_t>, std::negation>>, int> = 0> inline void DumpStructure(ABSL_ATTRIBUTE_UNUSED const T* object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr, std::ostream& dest) { chain_internal::DumpStructureDefault(dest); } } // namespace chain_internal // Supports `ExternalRef` and `Chain::Block`. void RiegeliDumpStructure(const std::string* self, std::ostream& dest); template struct Chain::ExternalMethodsFor { // Creates an external block containing an external object constructed from // `object`, and sets block data to `BytesRef(new_object)`. static IntrusiveSharedPtr NewBlock(Initializer object); // Creates an external block containing an external object constructed from // `object`, and sets block data to `data`. static IntrusiveSharedPtr NewBlock(Initializer object, absl::string_view substr); private: static void DeleteBlock(RawBlock* block); static void DumpStructure(const RawBlock& block, std::ostream& dest); static constexpr size_t DynamicSizeOf(); static void RegisterSubobjects(const RawBlock* block, MemoryEstimator& memory_estimator); public: static constexpr ExternalMethods kMethods = { DeleteBlock, DumpStructure, DynamicSizeOf(), RegisterSubobjects}; }; template inline IntrusiveSharedPtr Chain::ExternalMethodsFor::NewBlock(Initializer object) { return IntrusiveSharedPtr( NewAligned( RawBlock::kExternalAllocatedSize(), std::move(object))); } template inline IntrusiveSharedPtr Chain::ExternalMethodsFor::NewBlock(Initializer object, absl::string_view substr) { return IntrusiveSharedPtr( NewAligned( RawBlock::kExternalAllocatedSize(), std::move(object), substr)); } template void Chain::ExternalMethodsFor::DeleteBlock(RawBlock* block) { chain_internal::CallOperator(std::move(block->unchecked_external_object()), *block); block->unchecked_external_object().~T(); DeleteAligned( block, RawBlock::kExternalAllocatedSize()); } template void Chain::ExternalMethodsFor::DumpStructure(const RawBlock& block, std::ostream& dest) { chain_internal::DumpStructure(&block.unchecked_external_object(), block, dest); } template constexpr size_t Chain::ExternalMethodsFor::DynamicSizeOf() { return RawBlock::kExternalAllocatedSize(); } template void Chain::ExternalMethodsFor::RegisterSubobjects( const RawBlock* block, MemoryEstimator& memory_estimator) { chain_internal::RegisterSubobjects(&block->unchecked_external_object(), *block, memory_estimator); } template inline Chain::RawBlock::RawBlock(Initializer object) { external_.methods = &ExternalMethodsFor::kMethods; new (&unchecked_external_object()) T(std::move(object)); substr_ = BytesRef(unchecked_external_object()); RIEGELI_ASSERT(is_external()) << "A RawBlock with allocated_end_ == nullptr " "should be considered external"; } template inline Chain::RawBlock::RawBlock(Initializer object, absl::string_view substr) : substr_(substr) { external_.methods = &ExternalMethodsFor::kMethods; new (&unchecked_external_object()) T(std::move(object)); RIEGELI_ASSERT(is_external()) << "A RawBlock with allocated_end_ == nullptr " "should be considered external"; AssertSubstr(unchecked_external_object(), substr); } constexpr size_t Chain::RawBlock::kInternalAllocatedOffset() { return offsetof(RawBlock, allocated_begin_); } template constexpr size_t Chain::RawBlock::kExternalObjectOffset() { return RoundUp(offsetof(RawBlock, external_) + offsetof(External, object_lower_bound)); } template constexpr size_t Chain::RawBlock::kExternalAllocatedSize() { return kExternalObjectOffset() + sizeof(T); } template inline Chain::RawBlock* Chain::RawBlock::Ref() { ref_count_.Ref(); return this; } template inline void Chain::RawBlock::Unref() { if (ref_count_.Unref()) { if (is_internal()) { DeleteAligned(this, kInternalAllocatedOffset() + capacity()); } else { external_.methods->delete_block(this); } } } inline bool Chain::RawBlock::has_unique_owner() const { return ref_count_.HasUniqueOwner(); } inline size_t Chain::RawBlock::capacity() const { RIEGELI_ASSERT(is_internal()) << "Failed precondition of Chain::RawBlock::capacity(): " "block not internal"; return PtrDistance(allocated_begin_, allocated_end_); } template inline T& Chain::RawBlock::unchecked_external_object() { RIEGELI_ASSERT(is_external()) << "Failed precondition of Chain::RawBlock::unchecked_external_object(): " << "block not external"; return *std::launder(reinterpret_cast(reinterpret_cast(this) + kExternalObjectOffset())); } template inline const T& Chain::RawBlock::unchecked_external_object() const { RIEGELI_ASSERT(is_external()) << "Failed precondition of Chain::RawBlock::unchecked_external_object(): " << "block not external"; return *std::launder(reinterpret_cast( reinterpret_cast(this) + kExternalObjectOffset())); } template inline const T* Chain::RawBlock::checked_external_object() const { return is_external() && external_.methods == &ExternalMethodsFor::kMethods ? &unchecked_external_object() : nullptr; } template inline T* Chain::RawBlock::checked_external_object_with_unique_owner() { return is_external() && external_.methods == &ExternalMethodsFor::kMethods && has_unique_owner() ? &unchecked_external_object() : nullptr; } inline bool Chain::RawBlock::TryClear() { if (is_mutable()) { substr_ = substr_.substr(0, 0); return true; } return false; } inline bool Chain::RawBlock::TryRemoveSuffix(size_t length) { RIEGELI_ASSERT_LE(length, size()) << "Failed precondition of Chain::RawBlock::TryRemoveSuffix(): " << "length to remove greater than current size"; if (is_mutable()) { substr_.remove_suffix(length); return true; } return false; } inline bool Chain::RawBlock::TryRemovePrefix(size_t length) { RIEGELI_ASSERT_LE(length, size()) << "Failed precondition of Chain::RawBlock::TryRemovePrefix(): " << "length to remove greater than current size"; if (is_mutable()) { substr_.remove_prefix(length); return true; } return false; } inline Chain::BlockPtrPtr Chain::BlockPtrPtr::from_ptr(const BlockPtr* ptr) { return BlockPtrPtr(reinterpret_cast(ptr)); } inline bool Chain::BlockPtrPtr::is_special() const { return repr_ <= sizeof(BlockPtr); } inline const Chain::BlockPtr* Chain::BlockPtrPtr::as_ptr() const { RIEGELI_ASSERT(!is_special()) << "Unexpected special BlockPtrPtr value"; return reinterpret_cast(repr_); } // Code conditional on `is_special()` is written such that both branches // typically compile to the same code, allowing the compiler eliminate the // `is_special()` checks. inline Chain::BlockPtrPtr Chain::BlockPtrPtr::operator+(ptrdiff_t n) const { if (is_special()) { return BlockPtrPtr(IntCast(IntCast(repr_) + n * ptrdiff_t{sizeof(RawBlock*)})); } return BlockPtrPtr::from_ptr(as_ptr() + n); } inline Chain::BlockPtrPtr Chain::BlockPtrPtr::operator-(ptrdiff_t n) const { if (is_special()) { return BlockPtrPtr(IntCast(IntCast(repr_) - n * ptrdiff_t{sizeof(RawBlock*)})); } return BlockPtrPtr::from_ptr(as_ptr() - n); } inline Chain::BlockRef::operator absl::string_view() const { if (ptr_ == BlockIterator::kBeginShortData) { return chain_->short_data(); } else { return *ptr_.as_ptr()->block_ptr; } } inline bool Chain::BlockRef::empty() const { return ptr_ != BlockIterator::kBeginShortData && ptr_.as_ptr()->block_ptr->empty(); } inline const char* Chain::BlockRef::data() const { if (ptr_ == BlockIterator::kBeginShortData) { return chain_->short_data_begin(); } else { return ptr_.as_ptr()->block_ptr->data_begin(); } } inline size_t Chain::BlockRef::size() const { if (ptr_ == BlockIterator::kBeginShortData) { return chain_->size_; } else { return ptr_.as_ptr()->block_ptr->size(); } } inline bool Chain::BlockRef::ExternalCopy() const { return ptr_ == BlockIterator::kBeginShortData; } inline Chain::Block Chain::BlockRef::ToChainBlock( absl::string_view substr) const { RIEGELI_ASSERT(ptr_ != BlockIterator::kBeginShortData) << "Failed precondition of RiegeliToChainBlock(const Chain::BlockRef*): " "case excluded by RiegeliExternalCopy()"; return Block(ptr_.as_ptr()->block_ptr, substr); } template inline void Chain::BlockRef::ExternalDelegate(absl::string_view substr, Callback&& delegate_to) const { RIEGELI_ASSERT(ptr_ != BlockIterator::kBeginShortData) << "Failed precondition of " "RiegeliExternalDelegate(const Chain::BlockRef*): " "case excluded by RiegeliExternalCopy()"; std::forward(delegate_to)(Block(ptr_.as_ptr()->block_ptr), substr); } template inline const T* Chain::BlockRef::external_object() const { if (ptr_ == BlockIterator::kBeginShortData) { return nullptr; } else { return ptr_.as_ptr()->block_ptr->checked_external_object(); } } inline Chain::BlockIterator::BlockIterator( const Chain* chain ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t block_index) : chain_(chain), ptr_((ABSL_PREDICT_FALSE(chain_ == nullptr) ? kBeginShortData : chain_->begin_ == chain_->end_ ? (chain_->empty() ? kEndShortData : kBeginShortData) : BlockPtrPtr::from_ptr(chain_->begin_)) + IntCast(block_index)) {} inline Chain::BlockIterator::BlockIterator(const Chain* chain, BlockPtrPtr ptr) : chain_(chain), ptr_(ptr) {} inline size_t Chain::BlockIterator::block_index() const { if (ptr_ == kBeginShortData) { return 0; } else if (ptr_ == kEndShortData) { return chain_->empty() ? 0 : 1; } else { return PtrDistance(chain_->begin_, ptr_.as_ptr()); } } inline size_t Chain::BlockIterator::CharIndexInChain( size_t char_index_in_block) const { return CharIndexInChainInternal() + char_index_in_block; } inline Chain::BlockIterator::reference Chain::BlockIterator::operator*() const { RIEGELI_ASSERT(ptr_ != kEndShortData) << "Failed precondition of Chain::BlockIterator::operator*: " "iterator is end()"; return BlockRef(chain_, ptr_); } inline Chain::BlockIterator::pointer Chain::BlockIterator::operator->() const { return pointer(**this); } inline Chain::BlockIterator& Chain::BlockIterator::operator++() { ptr_ = ptr_ + 1; return *this; } inline Chain::BlockIterator Chain::BlockIterator::operator++(int) { const BlockIterator tmp = *this; ++*this; return tmp; } inline Chain::BlockIterator& Chain::BlockIterator::operator--() { ptr_ = ptr_ - 1; return *this; } inline Chain::BlockIterator Chain::BlockIterator::operator--(int) { const BlockIterator tmp = *this; --*this; return tmp; } inline Chain::BlockIterator& Chain::BlockIterator::operator+=( difference_type n) { ptr_ = ptr_ + n; return *this; } inline Chain::BlockIterator Chain::BlockIterator::operator+( difference_type n) const { return BlockIterator(*this) += n; } inline Chain::BlockIterator& Chain::BlockIterator::operator-=( difference_type n) { ptr_ = ptr_ - n; return *this; } inline Chain::BlockIterator Chain::BlockIterator::operator-( difference_type n) const { return BlockIterator(*this) -= n; } inline Chain::BlockIterator::reference Chain::BlockIterator::operator[]( difference_type n) const { return *(*this + n); } template >, std::is_convertible, BytesRef>>, int>> inline Chain::Block::Block(T&& object) : block_( ExternalMethodsFor>::NewBlock(std::forward(object))) {} template inline Chain::Block::Block(T&& object, absl::string_view substr) : block_(ExternalMethodsFor>::NewBlock(std::forward(object), substr)) {} inline Chain::Block::Block(RawBlock* block, absl::string_view substr) { if (block->size() == substr.size()) { block_.Reset(block, kShareOwnership); return; } if (const Block* const block_ptr = block->checked_external_object()) { // `block` is already a `Block`. Refer to its target instead. block = block_ptr->block_.get(); } block_.Reset(block, kShareOwnership); block_ = ExternalMethodsFor::NewBlock(std::move(*this), substr); } inline Chain::Block::Block(RawBlock* block) { if (const Block* const block_ptr = block->checked_external_object()) { // `block` is already a `Block`. Refer to its target instead. block = block_ptr->block_.get(); } block_.Reset(block, kShareOwnership); } inline Chain::Block::Block(IntrusiveSharedPtr block) { if (const Block* const block_ptr = block->checked_external_object()) { // `block` is already a `Block`. Refer to its target instead. block = block_ptr->block_; } block_ = std::move(block); } inline ExternalStorage Chain::Block::ToExternalStorage() && { return ExternalStorage(block_.Release(), [](void* ptr) { static_cast(ptr)->Unref(); }); } inline Chain::Blocks::iterator Chain::Blocks::begin() const { return BlockIterator(chain_, chain_->begin_ == chain_->end_ ? (chain_->empty() ? BlockIterator::kEndShortData : BlockIterator::kBeginShortData) : BlockPtrPtr::from_ptr(chain_->begin_)); } inline Chain::Blocks::iterator Chain::Blocks::end() const { return BlockIterator(chain_, chain_->begin_ == chain_->end_ ? BlockIterator::kEndShortData : BlockPtrPtr::from_ptr(chain_->end_)); } inline Chain::Blocks::size_type Chain::Blocks::size() const { if (chain_->begin_ == chain_->end_) { return chain_->empty() ? 0 : 1; } else { return PtrDistance(chain_->begin_, chain_->end_); } } inline bool Chain::Blocks::empty() const { return chain_->begin_ == chain_->end_ && chain_->empty(); } inline Chain::Blocks::reference Chain::Blocks::operator[](size_type n) const { RIEGELI_ASSERT_LT(n, size()) << "Failed precondition of Chain::Blocks::operator[]: " "block index out of range"; return BlockRef(chain_, chain_->begin_ == chain_->end_ ? BlockIterator::kBeginShortData : BlockPtrPtr::from_ptr(chain_->begin_ + n)); } inline Chain::Blocks::reference Chain::Blocks::at(size_type n) const { RIEGELI_CHECK_LT(n, size()) << "Failed precondition of Chain::Blocks::at(): " "block index out of range"; return BlockRef(chain_, chain_->begin_ == chain_->end_ ? BlockIterator::kBeginShortData : BlockPtrPtr::from_ptr(chain_->begin_ + n)); } inline Chain::Blocks::reference Chain::Blocks::front() const { RIEGELI_ASSERT(!empty()) << "Failed precondition of Chain::Blocks::front(): no blocks"; return BlockRef(chain_, chain_->begin_ == chain_->end_ ? BlockIterator::kBeginShortData : BlockPtrPtr::from_ptr(chain_->begin_)); } inline Chain::Blocks::reference Chain::Blocks::back() const { RIEGELI_ASSERT(!empty()) << "Failed precondition of Chain::Blocks::back(): no blocks"; return BlockRef(chain_, chain_->begin_ == chain_->end_ ? BlockIterator::kBeginShortData : BlockPtrPtr::from_ptr(chain_->end_ - 1)); } template constexpr size_t Chain::kExternalAllocatedSize() { return RawBlock::kExternalAllocatedSize(); } inline Chain::Chain(BytesRef src) { Initialize(src); } inline Chain::Chain(ExternalRef src) { std::move(src).InitializeTo(*this); } template ::value, int>> inline Chain::Chain(Src&& src) { ExternalRef(std::forward(src)).InitializeTo(*this); } inline Chain::Chain(Block src) { if (src.raw_block() != nullptr) Initialize(std::move(src)); } inline Chain::Chain(Chain&& that) noexcept : size_(std::exchange(that.size_, 0)) { // Use `std::memcpy()` instead of copy constructor to silence // `-Wmaybe-uninitialized` in gcc. std::memcpy(&block_ptrs_, &that.block_ptrs_, sizeof(BlockPtrs)); if (that.has_here()) { // `that.has_here()` implies that `that.begin_ == that.block_ptrs_.here` // already. begin_ = block_ptrs_.here; end_ = block_ptrs_.here + (std::exchange(that.end_, that.block_ptrs_.here) - that.block_ptrs_.here); } else { begin_ = std::exchange(that.begin_, that.block_ptrs_.here); end_ = std::exchange(that.end_, that.block_ptrs_.here); } // It does not matter what is left in `that.block_ptrs_` because `that.begin_` // and `that.end_` point to the empty prefix of `that.block_ptrs_.here[]`. } inline Chain& Chain::operator=(Chain&& that) noexcept { // Exchange `that.begin_` and `that.end_` early to support self-assignment. BlockPtr* begin; BlockPtr* end; if (that.has_here()) { // `that.has_here()` implies that `that.begin_ == that.block_ptrs_.here` // already. begin = block_ptrs_.here; end = block_ptrs_.here + (std::exchange(that.end_, that.block_ptrs_.here) - that.block_ptrs_.here); } else { begin = std::exchange(that.begin_, that.block_ptrs_.here); end = std::exchange(that.end_, that.block_ptrs_.here); } UnrefBlocks(); DeleteBlockPtrs(); // It does not matter what is left in `that.block_ptrs_` because `that.begin_` // and `that.end_` point to the empty prefix of `that.block_ptrs_.here[]`. Use // `std::memcpy()` instead of assignment to silence `-Wmaybe-uninitialized` in // gcc. std::memcpy(&block_ptrs_, &that.block_ptrs_, sizeof(BlockPtrs)); begin_ = begin; end_ = end; size_ = std::exchange(that.size_, 0); return *this; } inline Chain::~Chain() { UnrefBlocks(); DeleteBlockPtrs(); } inline void Chain::Reset() { Clear(); } inline void Chain::Reset(ExternalRef src) { std::move(src).AssignTo(*this); } template ::value, int>> inline void Chain::Reset(Src&& src) { ExternalRef(std::forward(src)).AssignTo(*this); } inline void Chain::Clear() { size_ = 0; if (begin_ != end_) ClearSlow(); } inline void Chain::Initialize(absl::string_view src) { RIEGELI_ASSERT_EQ(size_, 0u) << "Failed precondition of Chain::Initialize(string_view): " "size not reset"; if (src.size() <= kMaxShortDataSize) { if (src.empty()) return; EnsureHasHere(); size_ = src.size(); std::memcpy(short_data_begin(), src.data(), src.size()); return; } InitializeSlow(src); } inline void Chain::Initialize(Block src) { size_ = src.raw_block()->size(); (end_++)->block_ptr = std::move(src).raw_block().Release(); } inline absl::string_view Chain::short_data() const { return absl::string_view(short_data_begin(), size_); } inline char* Chain::short_data_begin() { RIEGELI_ASSERT_EQ(begin_, end_) << "Failed precondition of Chain::short_data_begin(): blocks exist"; RIEGELI_ASSERT(empty() || has_here()) << "Failed precondition of Chain::short_data_begin(): " "block pointer array is allocated"; return block_ptrs_.short_data; } inline const char* Chain::short_data_begin() const { RIEGELI_ASSERT_EQ(begin_, end_) << "Failed precondition of Chain::short_data_begin(): blocks exist"; RIEGELI_ASSERT(empty() || has_here()) << "Failed precondition of Chain::short_data_begin(): " "block pointer array is allocated"; return block_ptrs_.short_data; } inline void Chain::DeleteBlockPtrs() { if (has_allocated()) { std::allocator().deallocate( block_ptrs_.allocated.begin, 2 * PtrDistance(block_ptrs_.allocated.begin, block_ptrs_.allocated.end)); } } inline void Chain::EnsureHasHere() { RIEGELI_ASSERT_EQ(begin_, end_) << "Failed precondition of Chain::EnsureHasHere(): blocks exist"; if (ABSL_PREDICT_FALSE(has_allocated())) { DeleteBlockPtrs(); begin_ = block_ptrs_.here; end_ = block_ptrs_.here; } } inline void Chain::UnrefBlocks() { UnrefBlocks(begin_, end_); } inline void Chain::UnrefBlocks(const BlockPtr* begin, const BlockPtr* end) { if (begin != end) UnrefBlocksSlow(begin, end); } inline Chain::Blocks Chain::blocks() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return Blocks(this); } inline std::optional Chain::TryFlat() const ABSL_ATTRIBUTE_LIFETIME_BOUND { switch (end_ - begin_) { case 0: return short_data(); case 1: return *front(); default: return std::nullopt; } } inline absl::string_view Chain::Flatten() ABSL_ATTRIBUTE_LIFETIME_BOUND { switch (end_ - begin_) { case 0: return short_data(); case 1: return *front(); default: return FlattenSlow(); } } inline absl::Span Chain::AppendFixedBuffer(size_t length, Options options) ABSL_ATTRIBUTE_LIFETIME_BOUND { return AppendBuffer(length, length, length, options); } inline absl::Span Chain::PrependFixedBuffer( size_t length, Options options) ABSL_ATTRIBUTE_LIFETIME_BOUND { return PrependBuffer(length, length, length, options); } inline void Chain::Append(ExternalRef src) { std::move(src).AppendTo(*this); } inline void Chain::Append(ExternalRef src, Options options) { std::move(src).AppendTo(*this, options); } inline void Chain::Prepend(ExternalRef src) { std::move(src).PrependTo(*this); } inline void Chain::Prepend(ExternalRef src, Options options) { std::move(src).PrependTo(*this, options); } template ::value, int>> inline void Chain::Append(Src&& src) { ExternalRef(std::forward(src)).AppendTo(*this); } template ::value, int>> inline void Chain::Append(Src&& src, Options options) { ExternalRef(std::forward(src)).AppendTo(*this, options); } template ::value, int>> inline void Chain::Prepend(Src&& src) { ExternalRef(std::forward(src)).PrependTo(*this); } template ::value, int>> inline void Chain::Prepend(Src&& src, Options options) { ExternalRef(std::forward(src)).PrependTo(*this, options); } template HashState Chain::HashValue(HashState hash_state) const { if (empty()) return HashState::combine(std::move(hash_state), size_t{0}); RIEGELI_ASSERT(!blocks().empty()); constexpr size_t kChunkSize = 256; char chunk[kChunkSize]; // Hash chunks of size `kChunkSize` using `HashState::combine_contiguous()`. // The last chunk can be smaller; no chunk is empty. Then combine the size. size_t position = 0; for (size_t block_index = 0; block_index < blocks().size() - 1; ++block_index) { absl::string_view block = blocks()[block_index]; if (block.size() < kChunkSize - position) { std::memcpy(chunk + position, block.data(), block.size()); position += block.size(); continue; } if (position > 0) { const size_t remaining = kChunkSize - position; std::memcpy(chunk + position, block.data(), remaining); hash_state = HashState::combine_contiguous(std::move(hash_state), chunk, kChunkSize); block.remove_prefix(remaining); } while (block.size() >= kChunkSize) { hash_state = HashState::combine_contiguous(std::move(hash_state), block.data(), kChunkSize); block.remove_prefix(kChunkSize); } std::memcpy(chunk, block.data(), block.size()); position = block.size(); } // The last block can be hashed without copying its last chunk if there are no // buffered data from the previous blocks. absl::string_view block = blocks().back(); if (block.size() <= kChunkSize - position) { if (position > 0) { std::memcpy(chunk + position, block.data(), block.size()); position += block.size(); hash_state = HashState::combine_contiguous(std::move(hash_state), chunk, position); } else if (!block.empty()) { hash_state = HashState::combine_contiguous(std::move(hash_state), block.data(), block.size()); } return HashState::combine(std::move(hash_state), size()); } if (position > 0) { const size_t remaining = kChunkSize - position; std::memcpy(chunk + position, block.data(), remaining); hash_state = HashState::combine_contiguous(std::move(hash_state), chunk, kChunkSize); block.remove_prefix(remaining); } while (block.size() > kChunkSize) { hash_state = HashState::combine_contiguous(std::move(hash_state), block.data(), kChunkSize); block.remove_prefix(kChunkSize); } RIEGELI_ASSERT(!block.empty()); hash_state = HashState::combine_contiguous(std::move(hash_state), block.data(), block.size()); return HashState::combine(std::move(hash_state), size()); } template void Chain::Stringify(Sink& dest) const { for (const absl::string_view block : blocks()) dest.Append(block); } template void Chain::Debug(DebugStream& dest) const { dest.DebugStringQuote(); for (const absl::string_view fragment : blocks()) { dest.DebugStringFragment(fragment); } dest.DebugStringQuote(); } } // namespace riegeli #endif // RIEGELI_BASE_CHAIN_DETAILS_H_ ================================================ FILE: riegeli/base/closing_ptr.h ================================================ // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_CLOSING_PTR_H_ #define RIEGELI_BASE_CLOSING_PTR_H_ #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // A deleter for `std::unique_ptr` which does nothing. struct NullDeleter { template void operator()(ABSL_ATTRIBUTE_UNUSED T* ptr) const {} }; // Marks the pointer with the intent to transfer the responsibility to close the // object when done with the pointer, even though the object is not moved nor // destroyed. // // In the context of `Dependency` and `Any`, passing `ClosingPtr(&m)` // instead of `std::move(m)` avoids moving `m`, but the caller must ensure that // the dependent object is valid while the host object needs it. template using ClosingPtrType = std::unique_ptr; template inline ClosingPtrType ClosingPtr(T* ptr) { return ClosingPtrType(ptr); } } // namespace riegeli #endif // RIEGELI_BASE_CLOSING_PTR_H_ ================================================ FILE: riegeli/base/compact_string.cc ================================================ // Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/compact_string.h" #include #include #include #include #include #include "absl/base/optimization.h" #include "absl/strings/string_view.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/estimated_allocated_size.h" namespace riegeli { void CompactString::AssignSlow(absl::string_view src) { const size_t old_capacity = capacity(); DeleteRepr(std::exchange( repr_, MakeRepr(src, UnsignedMax(src.size(), old_capacity + old_capacity / 2)))); } void CompactString::AssignSlow(const CompactString& that) { const uintptr_t that_tag = that.repr_ & kTagMask; const size_t that_size = that.allocated_size_for_tag(that_tag); if (ABSL_PREDICT_TRUE(that_size <= capacity())) { set_size(that_size); // Use `std::memmove()` to support assigning from `*this`. std::memmove(data(), that.allocated_data(), that_size); } else { AssignSlow(absl::string_view(that.allocated_data(), that_size)); } } uintptr_t CompactString::MakeReprSlow(size_t size, size_t capacity) { RIEGELI_ASSERT_LE(size, capacity) << "Failed precondition of CompactString::MakeReprSlow(): " "size exceeds capacity"; RIEGELI_ASSERT_GT(capacity, kInlineCapacity) << "Failed precondition of CompactString::MakeReprSlow(): " "representation is inline, use MakeRepr() instead"; uintptr_t repr; if (capacity <= 0xff) { const size_t requested = UnsignedMin(EstimatedAllocatedSize(capacity + 2), size_t{0xff + 2}); repr = reinterpret_cast(Allocate(requested) + 2); set_allocated_capacity(requested - 2, repr); set_allocated_size(size, repr); } else if (capacity <= 0xffff) { const size_t requested = UnsignedMin(EstimatedAllocatedSize(capacity + 4), size_t{0xffff + 4}); repr = reinterpret_cast(Allocate(requested) + 4); set_allocated_capacity(requested - 4, repr); set_allocated_size(size, repr); } else { static_assert(sizeof(size_t) % 4 == 0, "Unsupported size_t size"); RIEGELI_CHECK_LE(capacity, max_size()) << "CompactString capacity overflow"; const size_t requested = EstimatedAllocatedSize(capacity + 2 * sizeof(size_t)); repr = reinterpret_cast(Allocate(requested) + 2 * sizeof(size_t)); set_allocated_capacity(requested - 2 * sizeof(size_t), repr); set_allocated_size(size, repr); } return repr; } char* CompactString::ResizeSlow(size_t new_size, size_t min_capacity, size_t used_size) { RIEGELI_ASSERT_LE(new_size, min_capacity) << "Failed precondition of CompactString::ResizeSlow(): " "size exceeds capacity"; RIEGELI_ASSERT_LE(used_size, size()) << "Failed precondition of CompactString::ResizeSlow(): " "used size exceeds old size"; RIEGELI_ASSERT_LE(used_size, new_size) << "Failed precondition of CompactString::ResizeSlow(): " "used size exceeds new size"; const size_t old_capacity = capacity(); RIEGELI_ASSERT_GT(min_capacity, kInlineCapacity) << "Inline representation has a fixed capacity, so reallocation is never " "needed when the new capacity can use inline representation"; const uintptr_t new_repr = MakeReprSlow( new_size, UnsignedMax(min_capacity, old_capacity + old_capacity / 2)); char* ptr = allocated_data(new_repr); std::memcpy(ptr, data(), used_size); ptr += used_size; DeleteRepr(std::exchange(repr_, new_repr)); return ptr; } void CompactString::ShrinkToFitSlow() { const uintptr_t tag = repr_ & kTagMask; RIEGELI_ASSERT_NE(tag, kInlineTag) << "Failed precondition of CompactString::ShrinkToFitSlow(): " "representation is inline, use shrink_to_fit() instead"; size_t size; if (tag == 2) { size = allocated_size(); if (size > kInlineCapacity && allocated_capacity() + 2 <= UnsignedMin(EstimatedAllocatedSize(size + 2), size_t{0xff + 2})) { return; } } else if (tag == 4) { size = allocated_size(); if (size > 0xff && allocated_capacity() + 4 <= UnsignedMin(EstimatedAllocatedSize(size + 4), size_t{0xffff + 4})) { return; } } else if (tag == 0) { size = allocated_size(); if (size > 0xffff && allocated_capacity() + 2 * sizeof(size_t) <= EstimatedAllocatedSize(size + 2 * sizeof(size_t))) { return; } } else { RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag; } DeleteRepr(std::exchange( repr_, MakeRepr(absl::string_view(allocated_data(), size)))); } char* CompactString::AppendSlow(size_t length) { const size_t old_size = size(); RIEGELI_CHECK_LE(length, max_size() - old_size) << "CompactString size overflow"; const size_t new_size = old_size + length; return ResizeSlow(new_size, new_size, old_size); } void CompactString::AppendSlow(absl::string_view src) { RIEGELI_ASSERT(!src.empty()) << "Failed precondition of CompactString::AppendSlow(): " "nothing to append"; const size_t old_size = size(); const size_t old_capacity = capacity(); RIEGELI_CHECK_LE(src.size(), max_size() - old_size) << "CompactString size overflow"; const size_t new_size = old_size + src.size(); RIEGELI_ASSERT_GT(new_size, kInlineCapacity) << "Inline representation has a fixed capacity, so reallocation is never " "needed when the new capacity can use inline representation"; const uintptr_t new_repr = MakeReprSlow( new_size, UnsignedMax(new_size, old_capacity + old_capacity / 2)); char* ptr = allocated_data(new_repr); std::memcpy(ptr, data(), old_size); ptr += old_size; // Copy from `src` before deleting `repr_` to support appending from a // substring of `*this`. std::memcpy(ptr, src.data(), src.size()); DeleteRepr(std::exchange(repr_, new_repr)); } void CompactString::ReserveOneMoreByteSlow() { const size_t used_size = size(); RIEGELI_ASSERT_GT(used_size + 1, kInlineCapacity) << "Inline representation has a fixed capacity, so reallocation is never " "needed when the new capacity can use inline representation"; const uintptr_t new_repr = MakeReprSlow(used_size, used_size + 1); char* const ptr = allocated_data(new_repr); std::memcpy(ptr, data(), used_size); DeleteRepr(std::exchange(repr_, new_repr)); } void CompactString::DumpStructure(absl::string_view substr, std::ostream& dest) const { dest << "[compact_string] {"; if (!substr.empty()) { if (substr.data() != data()) { dest << " space_before: " << PtrDistance(data(), substr.data()); } dest << " space_after: " << PtrDistance(substr.data() + substr.size(), data() + capacity()); } dest << " }"; } } // namespace riegeli ================================================ FILE: riegeli/base/compact_string.h ================================================ // Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_COMPACT_STRING_H_ #define RIEGELI_BASE_COMPACT_STRING_H_ #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/optimization.h" #include "absl/hash/hash.h" #include "absl/strings/string_view.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/bytes_ref.h" #include "riegeli/base/compare.h" #include "riegeli/base/external_data.h" #include "riegeli/base/new_aligned.h" #include "riegeli/base/null_safe_memcpy.h" #include "riegeli/base/type_traits.h" namespace riegeli { // `CompactString` provides a subset of functionality of `std::string`, while // having less space overhead. It is useful for storing many short strings for // a long time where each string owns its memory. // // A `CompactString` object internally consists of a pointer to heap-allocated // data. The representation has 4 cases, distinguished by how the pointer is // aligned modulo 8: // * 6 - not really a pointer but short string optimization: the size is // stored in bits [3..8), the data are stored in the remaining bytes // * 2 - the size is stored before the data as `uint8_t` // * 4 - the size is stored before the data as `uint16_t` // * 0 - the size is stored before the data as `size_t` // // In the last three cases the capacity is stored before the size in the same // width as the size. // // The data are not necessarily NUL-terminated. // // Since `data()`, `size()`, `operator[]` etc. involve branches, for iteration // it is faster to store the result of conversion to `absl::string_view` and // iterate over that, or use `StringReader`, and for repeated appending it is // faster to use `CompactStringWriter`. // // Memory usage of a `CompactString` of capacity c, assuming 8-byte pointers, // where H(n) is memory usage of a heap-allocated block of length n: // // c | `CompactString` memory usage // ----------------|------------------------------ // 0 .. 7 | 8 // 8 .. 255 | 8 + H(c + 2) // 256 .. 65535 | 8 + H(c + 4) // 65536 .. max | 8 + H(c + 16) // // For sizes up to 255 this is less than libc++ `std::string` by about 15, and // less than libstdc++ `std::string` by about 23. class ABSL_ATTRIBUTE_TRIVIAL_ABI CompactString : public WithCompare { public: static constexpr size_t max_size() { return std::numeric_limits::max() - 2 * sizeof(size_t); } // Creates an empty `CompactString`. CompactString() = default; // Creates a `CompactString` with the given size and uninitialized data. explicit CompactString(size_t size) : repr_(MakeRepr(size)) {} // Creates a `CompactString` which holds a copy of `src`. explicit CompactString(BytesRef src) : repr_(MakeRepr(src)) {} CompactString& operator=(BytesRef src); // Creates a `CompactString` which holds a copy of `src`. Reserves one extra // char so that `c_str()` does not need reallocation. static CompactString ForCStr(BytesRef src) { return CompactString(FromReprTag(), MakeRepr(src, src.size() + 1)); } CompactString(const CompactString& that); CompactString& operator=(const CompactString& that); // The source `CompactString` is left empty. CompactString(CompactString&& that) noexcept : repr_(std::exchange(that.repr_, kInlineTag)) {} CompactString& operator=(CompactString&& that) { DeleteRepr(std::exchange(repr_, std::exchange(that.repr_, kInlineTag))); return *this; } ~CompactString() { DeleteRepr(repr_); } // Views the value as an `absl::string_view`. /*implicit*/ operator absl::string_view() const ABSL_ATTRIBUTE_LIFETIME_BOUND; bool empty() const { return size() == 0; } char* data() ABSL_ATTRIBUTE_LIFETIME_BOUND; // Never `nullptr`. const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND; // Never `nullptr`. size_t size() const; size_t capacity() const; char& operator[](size_t index) ABSL_ATTRIBUTE_LIFETIME_BOUND; const char& operator[](size_t index) const ABSL_ATTRIBUTE_LIFETIME_BOUND; char& at(size_t index) ABSL_ATTRIBUTE_LIFETIME_BOUND; const char& at(size_t index) const ABSL_ATTRIBUTE_LIFETIME_BOUND; char& front() ABSL_ATTRIBUTE_LIFETIME_BOUND; const char& front() const ABSL_ATTRIBUTE_LIFETIME_BOUND; char& back() ABSL_ATTRIBUTE_LIFETIME_BOUND; const char& back() const ABSL_ATTRIBUTE_LIFETIME_BOUND; void clear() { set_size(0); } // Sets the size to `new_size` without reallocation. // // If `new_size <= size()`, the prefix of data with `new_size` is preserved. // // If `new_size >= size()`, all existing data are preserved and new data are // uninitialized. // // Precondition: `new_size <= capacity()` void set_size(size_t new_size); // Sets the size to `new_size`, reallocating if needed, ensuring that repeated // growth has the cost proportional to the final size. // // If `new_size <= size()`, the prefix of data with `new_size` is preserved. // // If `new_size >= size()`, all existing data are preserved and new data are // uninitialized. // // `resize(new_size)` is equivalent to `reserve(new_size)` followed by // `set_size(new_size)`. void resize(size_t new_size); // Sets the size to `new_size`, ensuring that repeated growth has the cost // proportional to the final size. // // The prefix of data with `used_size` is preserved. // // If `new_size > size()`, new data are uninitialized. // // Returns `data() + used_size`, for convenience of appending to previously // used data. // // `resize(new_size, used_size)` is equivalent to `set_size(used_size)` // followed by `resize(new_size)` and returning `data() + used_size`. // `resize(new_size)` is equivalent to `resize(new_size, size())`. // // Preconditions: // `used_size <= size()` // `used_size <= new_size` char* resize(size_t new_size, size_t used_size) ABSL_ATTRIBUTE_LIFETIME_BOUND; // Ensures that `capacity() >= min_capacity`, ensuring that repeated growth // has the cost proportional to the final size. void reserve(size_t min_capacity); void shrink_to_fit(); // Appends `length` uninitialized data. // // Returns `data() + used_size` where `used_size` is `size()` before the call, // for convenience of appending to previously used data. // // `append(length)` is equivalent to `resize(size() + length, size())` with // a check against overflow of `size() + length`. char* append(size_t length) ABSL_ATTRIBUTE_LIFETIME_BOUND; // Appends `src`. void append(absl::string_view src); // Ensures that `data()` are NUL-terminated after `size()` and returns // `data()`. // // In contrast to `std::string::c_str()`, this is a non-const operation. // It may reallocate the string and it writes the NUL each time. const char* c_str() ABSL_ATTRIBUTE_LIFETIME_BOUND; // Returns the representation of the `CompactString` as `uintptr_t`. // // Ownership is transferred to the `uintptr_t`, the `CompactString` is // left empty. The `uintptr_t` must be passed exactly once to // `CompactString::MoveFromRaw()` to recover the `CompactString` and free its // memory. // // The returned `uintptr_t` is always even and never zero. uintptr_t RawMove() && { return std::exchange(repr_, kInlineTag); } // Returns a pointer to the representation of the `CompactString` as // `uintptr_t`. // // Ownership is not transferred and the `CompactString` is unchanged. // // The returned `uintptr_t` is always even and never zero. const uintptr_t* RawView() const { return &repr_; } // Recovers a `CompactString` from the representation returned by // `CompactString::RawMove()`. // // Ownership is transferred to the `CompactString`, `raw` must not be read // again. // // Calling `MoveFromRaw()` and dropping its result frees the memory of the // `CompactString`. static CompactString MoveFromRaw(const uintptr_t& raw) { RIEGELI_ASSERT_NE(raw, 0u) << "Failed precondition of CompactString::MoveFromRaw(): " "representation is zero"; RIEGELI_ASSERT_EQ(raw & 1, 0u) << "Failed precondition of CompactString::MoveFromRaw(): " "representation is not even"; const uintptr_t raw_copy = raw; // The original `raw` will possibly hold a pointer which had ownership // transferred and thus might no longer be valid. Hence reading `raw` again // is most likely a bug. MarkPoisoned(reinterpret_cast(&raw), sizeof(uintptr_t)); return CompactString(FromReprTag(), raw_copy); } // Views contents of a `CompactString` from the representation returned by // `CompactString::RawMove()` or `CompactString::RawView()`. // // Ownership is not transferred and `*raw` is unchanged. static absl::string_view ViewFromRaw( const uintptr_t* raw ABSL_ATTRIBUTE_LIFETIME_BOUND) { RIEGELI_ASSERT_NE(*raw, 0u) << "Failed precondition of CompactString::ViewFromRaw(): " "representation is zero"; RIEGELI_ASSERT_EQ(*raw & 1, 0u) << "Failed precondition of CompactString::ViewFromRaw(): " "representation is not even"; const uintptr_t tag = *raw & kTagMask; if (tag == kInlineTag) { return absl::string_view(inline_data(raw), inline_size(*raw)); } return absl::string_view(allocated_data(*raw), allocated_size_for_tag(tag, *raw)); } // Returns the representation of a copy of the `CompactString` viewed from // the representation returned by `CompactString::RawMove()`. // // Equivalent to `RawMove(CompactString(ViewFromRaw(&raw)))`. static uintptr_t CopyRaw(uintptr_t raw) { RIEGELI_ASSERT_NE(raw, 0u) << "Failed precondition of CompactString::CopyRaw(): " "representation is zero"; RIEGELI_ASSERT_EQ(raw & 1, 0u) << "Failed precondition of CompactString::CopyRaw(): " "representation is not even"; const uintptr_t tag = raw & kTagMask; if (tag == kInlineTag) return raw; return MakeRepr(absl::string_view(allocated_data(raw), allocated_size_for_tag(tag, raw))); } static const char* CStrFromRaw(uintptr_t* raw); friend bool operator==(const CompactString& a, const CompactString& b) { return a.repr_ == b.repr_ || absl::string_view(a) == absl::string_view(b); } friend StrongOrdering RIEGELI_COMPARE(const CompactString& a, const CompactString& b) { if (a.repr_ == b.repr_) return StrongOrdering::equal; return riegeli::Compare(absl::string_view(a), absl::string_view(b)); } template < typename T, std::enable_if_t, std::is_convertible>, int> = 0> friend bool operator==(const CompactString& a, T&& b) { return absl::string_view(a) == BytesRef(std::forward(b)); } template < typename T, std::enable_if_t, std::is_convertible>, int> = 0> friend StrongOrdering RIEGELI_COMPARE(const CompactString& a, T&& b) { return riegeli::Compare(absl::string_view(a), BytesRef(std::forward(b))); } template friend HashState AbslHashValue(HashState hash_state, const CompactString& self) { return HashState::combine(std::move(hash_state), absl::string_view(self)); } // Default stringification by `absl::StrCat()` etc. template friend void AbslStringify(Sink& dest, const CompactString& src) { dest.Append(absl::string_view(src)); } friend std::ostream& operator<<(std::ostream& dest, const CompactString& src) { return dest << absl::string_view(src); } // Supports `absl::Format(&compact_string, format, args...)`. friend void AbslFormatFlush(CompactString* dest, absl::string_view src) { dest->append(src); } // Indicates support for: // * `ExternalRef(CompactString&&)` // * `ExternalRef(CompactString&&, substr)` friend void RiegeliSupportsExternalRef(CompactString*) {} // Supports `ExternalRef`. friend bool RiegeliExternalCopy(const CompactString* self) { return (self->repr_ & kTagMask) == kInlineTag; } // Supports `ExternalRef`. friend ExternalStorage RiegeliToExternalStorage(CompactString* self) { return ExternalStorage( reinterpret_cast(std::exchange(self->repr_, kInlineTag)), [](void* ptr) { const uintptr_t repr = reinterpret_cast(ptr); RIEGELI_ASSUME_NE(repr & kTagMask, kInlineTag) << "Failed precondition of " "RiegeliToExternalStorage(CompactString*): " "case excluded by RiegeliExternalCopy()"; DeleteRepr(repr); }); } // Supports `ExternalRef` and `Chain::Block`. friend void RiegeliDumpStructure(const CompactString* self, absl::string_view substr, std::ostream& dest) { self->DumpStructure(substr, dest); } // Supports `MemoryEstimator`. template friend void RiegeliRegisterSubobjects(const CompactString* self, MemoryEstimator& memory_estimator) { self->RegisterSubobjects(memory_estimator); } private: struct FromReprTag { explicit FromReprTag() = default; }; explicit CompactString(FromReprTag, uintptr_t raw) : repr_(raw) {} static constexpr size_t kTagBits = 3; static constexpr uintptr_t kTagMask = (1u << kTagBits) - 1; static constexpr uintptr_t kInlineTag = 6; static constexpr size_t kInlineCapacity = UnsignedMin(sizeof(uintptr_t) - 1, size_t{0xff >> kTagBits}); #if ABSL_IS_LITTLE_ENDIAN static constexpr size_t kInlineDataOffset = 1; #elif ABSL_IS_BIG_ENDIAN static constexpr size_t kInlineDataOffset = 0; #else #error Unknown endianness #endif char* inline_data() { return inline_data(&repr_); } const char* inline_data() const { return inline_data(&repr_); } static char* inline_data(uintptr_t* repr) { RIEGELI_ASSERT_EQ(*repr & kTagMask, kInlineTag) << "Failed precondition of CompactString::inline_data(): " "representation not inline"; return reinterpret_cast(repr) + kInlineDataOffset; } static const char* inline_data(const uintptr_t* repr) { RIEGELI_ASSERT_EQ(*repr & kTagMask, kInlineTag) << "Failed precondition of CompactString::inline_data(): " "representation not inline"; return reinterpret_cast(repr) + kInlineDataOffset; } size_t inline_size() const { return inline_size(repr_); } static size_t inline_size(uintptr_t repr) { RIEGELI_ASSERT_EQ(repr & kTagMask, kInlineTag) << "Failed precondition of CompactString::inline_size(): " "representation not inline"; const size_t size = IntCast((repr & 0xff) >> kTagBits); // This assumption helps the compiler to reason about comparisons with // `size()`. RIEGELI_ASSUME_LE(size, kInlineCapacity) << "Failed invariant of CompactString: " "inline size never exceeds kInlineCapacity"; return size; } char* allocated_data() const { return allocated_data(repr_); } static char* allocated_data(uintptr_t repr) { RIEGELI_ASSERT_NE(repr & kTagMask, kInlineTag) << "Failed precondition of CompactString::allocated_data(): " "representation not allocated"; return reinterpret_cast(repr); } size_t allocated_size_for_tag(uintptr_t tag) const { return allocated_size_for_tag(tag, repr_); } static size_t allocated_size_for_tag(uintptr_t tag, uintptr_t repr) { if (tag == 2) return allocated_size(repr); if (tag == 4) return allocated_size(repr); if (tag == 0) return allocated_size(repr); RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag; } template size_t allocated_size() const { return allocated_size(repr_); } template static size_t allocated_size(uintptr_t repr) { const uintptr_t tag = repr & kTagMask; RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T)) << "Failed precondition of CompactString::allocated_size(): " "tag does not match size representation"; T stored_size; std::memcpy(&stored_size, allocated_data(repr) - sizeof(T), sizeof(T)); return size_t{stored_size}; } void set_inline_size(size_t size) { set_inline_size(size, repr_); } static void set_inline_size(size_t size, uintptr_t& repr) { RIEGELI_ASSERT_EQ(repr & kTagMask, kInlineTag) << "Failed precondition of CompactString::set_inline_size(): " "representation not inline"; repr = (repr & ~(0xff & ~kTagMask)) | (size << kTagBits); } template void set_allocated_size(size_t size) { set_allocated_size(size, repr_); } template static void set_allocated_size(size_t size, uintptr_t repr) { const uintptr_t tag = repr & kTagMask; RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T)) << "Failed precondition of CompactString::set_allocated_size(): " "tag does not match size representation"; const T stored_size = IntCast(size); std::memcpy(allocated_data(repr) - sizeof(T), &stored_size, sizeof(T)); } void set_allocated_size_for_tag(uintptr_t tag, size_t new_size); size_t allocated_capacity_for_tag(uintptr_t tag) const { return allocated_capacity_for_tag(tag, repr_); } static size_t allocated_capacity_for_tag(uintptr_t tag, uintptr_t repr) { if (tag == 2) return allocated_capacity(repr); if (tag == 4) return allocated_capacity(repr); if (tag == 0) return allocated_capacity(repr); RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag; } template size_t allocated_capacity() const { return allocated_capacity(repr_); } template static size_t allocated_capacity(uintptr_t repr) { const uintptr_t tag = repr & kTagMask; RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T)) << "Failed precondition of CompactString::allocated_capacity(): " "tag does not match capacity representation"; T stored_capacity; std::memcpy(&stored_capacity, allocated_data(repr) - 2 * sizeof(T), sizeof(T)); // This assumption helps the compiler to reason about comparisons with // `capacity()`. RIEGELI_ASSUME_GT(stored_capacity, kInlineCapacity) << "Failed invariant of CompactString: " "allocated capacity always exceeds kInlineCapacity"; return size_t{stored_capacity}; } template static void set_allocated_capacity(size_t capacity, uintptr_t repr) { const uintptr_t tag = repr & kTagMask; RIEGELI_ASSERT_EQ(tag == 0 ? 2 * sizeof(size_t) : tag, 2 * sizeof(T)) << "Failed precondition of CompactString::set_allocated_capacity(): " "tag does not match capacity representation"; const T stored_capacity = IntCast(capacity); std::memcpy(allocated_data(repr) - 2 * sizeof(T), &stored_capacity, sizeof(T)); } static char* Allocate(size_t size) { return static_cast(NewAligned(size)); } static void Free(char* ptr, size_t size) { DeleteAligned(ptr, size); } static uintptr_t MakeRepr(size_t size, size_t capacity); static uintptr_t MakeReprSlow(size_t size, size_t capacity); static uintptr_t MakeRepr(size_t size); static uintptr_t MakeRepr(absl::string_view src, size_t capacity); static uintptr_t MakeRepr(absl::string_view src); static void DeleteRepr(uintptr_t repr); void AssignSlow(absl::string_view src); void AssignSlow(const CompactString& that); char* ResizeSlow(size_t new_size, size_t min_capacity, size_t used_size); void ShrinkToFitSlow(); char* AppendSlow(size_t length); void AppendSlow(absl::string_view src); void ReserveOneMoreByteSlow(); void DumpStructure(absl::string_view substr, std::ostream& dest) const; template void RegisterSubobjects(MemoryEstimator& memory_estimator) const; uintptr_t repr_ = kInlineTag; }; // Hash and equality which support heterogeneous lookup. struct CompactStringHash { using is_transparent = void; size_t operator()(const CompactString& value) const { return absl::Hash()(value); } size_t operator()(absl::string_view value) const { return absl::Hash()(value); } }; struct CompactStringEq { using is_transparent = void; bool operator()(const CompactString& a, const CompactString& b) const { return a == b; } bool operator()(const CompactString& a, absl::string_view b) const { return a == b; } bool operator()(absl::string_view a, const CompactString& b) const { return a == b; } bool operator()(absl::string_view a, absl::string_view b) const { return a == b; } }; // Implementation details follow. inline uintptr_t CompactString::MakeRepr(size_t size, size_t capacity) { RIEGELI_ASSERT_LE(size, capacity) << "Failed precondition of CompactString::MakeRepr(): " "size greater than capacity"; if (capacity <= kInlineCapacity) { return uintptr_t{(size << kTagBits) + kInlineTag}; } return MakeReprSlow(size, capacity); } inline uintptr_t CompactString::MakeRepr(size_t size) { return MakeRepr(size, size); } inline uintptr_t CompactString::MakeRepr(absl::string_view src, size_t capacity) { uintptr_t repr = MakeRepr(src.size(), capacity); riegeli::null_safe_memcpy( capacity <= kInlineCapacity ? inline_data(&repr) : allocated_data(repr), src.data(), src.size()); return repr; } inline uintptr_t CompactString::MakeRepr(absl::string_view src) { return MakeRepr(src, src.size()); } inline void CompactString::DeleteRepr(uintptr_t repr) { const uintptr_t tag = repr & kTagMask; if (tag == kInlineTag) return; const size_t offset = tag == 0 ? 2 * sizeof(size_t) : IntCast(tag); Free(allocated_data(repr) - offset, allocated_capacity_for_tag(tag, repr) + offset); } inline CompactString& CompactString::operator=(BytesRef src) { if (ABSL_PREDICT_TRUE(src.size() <= capacity())) { set_size(src.size()); // Use `memmove()` to support assigning from a substring of `*this`. riegeli::null_safe_memmove(data(), src.data(), src.size()); } else { AssignSlow(src); } return *this; } inline CompactString::CompactString(const CompactString& that) { const uintptr_t that_tag = that.repr_ & kTagMask; if (that_tag == kInlineTag) { repr_ = that.repr_; } else { repr_ = MakeRepr(absl::string_view(that.allocated_data(), that.allocated_size_for_tag(that_tag))); } } inline CompactString& CompactString::operator=(const CompactString& that) { const uintptr_t that_tag = that.repr_ & kTagMask; if (that_tag == kInlineTag) { const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) { repr_ = that.repr_; } else { set_allocated_size_for_tag(tag, that.inline_size()); RIEGELI_ASSERT_LE(kInlineCapacity, capacity()) << "Failed invariant of CompactString: " "inline capacity always fits in a capacity"; // Copy fixed `kInlineCapacity` instead of variable `that.inline_size()`. std::memcpy(allocated_data(), that.inline_data(), kInlineCapacity); // The `#ifdef` helps the compiler to realize that computing the arguments // is unnecessary if `MarkPoisoned()` does nothing. #ifdef MEMORY_SANITIZER // This part got unpoisoned by copying `kInlineCapacity` instead of // `that.inline_size()`. Poison it again. MarkPoisoned(allocated_data() + that.inline_size(), kInlineCapacity - that.inline_size()); #endif } } else { AssignSlow(that); } return *this; } inline char* CompactString::data() ABSL_ATTRIBUTE_LIFETIME_BOUND { const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) return inline_data(); return allocated_data(); } inline const char* CompactString::data() const ABSL_ATTRIBUTE_LIFETIME_BOUND { const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) return inline_data(); return allocated_data(); } inline size_t CompactString::size() const { const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) return inline_size(); return allocated_size_for_tag(tag); } inline size_t CompactString::capacity() const { const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) return kInlineCapacity; return allocated_capacity_for_tag(tag); } inline CompactString::operator absl::string_view() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return ViewFromRaw(&repr_); } inline char& CompactString::operator[](size_t index) ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT_LT(index, size()) << "Failed precondition of CompactString::operator[]: index out of range"; return data()[index]; } inline const char& CompactString::operator[](size_t index) const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT_LT(index, size()) << "Failed precondition of CompactString::operator[]: index out of range"; return data()[index]; } inline char& CompactString::at(size_t index) ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_CHECK_LT(index, size()) << "Failed precondition of CompactString::at(): index out of range"; return data()[index]; } inline const char& CompactString::at(size_t index) const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_CHECK_LT(index, size()) << "Failed precondition of CompactString::at(): index out of range"; return data()[index]; } inline char& CompactString::front() ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(!empty()) << "Failed precondition of CompactString::front(): empty string"; return data()[0]; } inline const char& CompactString::front() const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(!empty()) << "Failed precondition of CompactString::front(): empty string"; return data()[0]; } inline char& CompactString::back() ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(!empty()) << "Failed precondition of CompactString::back(): empty string"; return data()[size() - 1]; } inline const char& CompactString::back() const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(!empty()) << "Failed precondition of CompactString::back(): empty string"; return data()[size() - 1]; } inline void CompactString::set_size(size_t new_size) { RIEGELI_ASSERT_LE(new_size, capacity()) << "Failed precondition of CompactString::SetSize(): size out of range"; const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) { set_inline_size(new_size); return; } set_allocated_size_for_tag(tag, new_size); } inline void CompactString::set_allocated_size_for_tag(uintptr_t tag, size_t new_size) { // The `#ifdef` helps the compiler to realize that computing the arguments is // unnecessary if `MarkPoisoned()` does nothing. #ifdef MEMORY_SANITIZER if (new_size < allocated_size_for_tag(tag)) { MarkPoisoned(allocated_data() + new_size, allocated_size_for_tag(tag) - new_size); } #endif if (tag == 2) { set_allocated_size(new_size); } else if (tag == 4) { set_allocated_size(new_size); } else if (tag == 0) { set_allocated_size(new_size); } else { RIEGELI_ASSUME_UNREACHABLE() << "Impossible tag: " << tag; } } inline void CompactString::resize(size_t new_size) { if (ABSL_PREDICT_TRUE(new_size <= capacity())) { set_size(new_size); return; } ResizeSlow(new_size, new_size, size()); } inline char* CompactString::resize(size_t new_size, size_t used_size) ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT_LE(used_size, size()) << "Failed precondition of CompactString::resize(): " "used size exceeds old size"; RIEGELI_ASSERT_LE(used_size, new_size) << "Failed precondition of CompactString::resize(): " "used size exceeds new size"; if (ABSL_PREDICT_TRUE(new_size <= capacity())) { // The `#ifdef` helps the compiler to realize that computing the arguments // is unnecessary if `MarkPoisoned()` does nothing. #ifdef MEMORY_SANITIZER const uintptr_t tag = repr_ & kTagMask; if (tag != kInlineTag) { MarkPoisoned( allocated_data() + used_size, UnsignedMin(allocated_size_for_tag(tag), new_size) - used_size); } #endif set_size(new_size); return data() + used_size; } return ResizeSlow(new_size, new_size, used_size); } inline void CompactString::reserve(size_t min_capacity) { if (ABSL_PREDICT_TRUE(min_capacity <= capacity())) return; const size_t used_size = size(); ResizeSlow(used_size, min_capacity, used_size); } inline void CompactString::shrink_to_fit() { const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) return; ShrinkToFitSlow(); } inline char* CompactString::append(size_t length) ABSL_ATTRIBUTE_LIFETIME_BOUND { const size_t old_size = size(); const size_t old_capacity = capacity(); if (ABSL_PREDICT_TRUE(length <= old_capacity - old_size)) { set_size(old_size + length); return data() + old_size; } return AppendSlow(length); } inline void CompactString::append(absl::string_view src) { const size_t old_size = size(); const size_t old_capacity = capacity(); if (ABSL_PREDICT_TRUE(src.size() <= old_capacity - old_size)) { set_size(old_size + src.size()); riegeli::null_safe_memcpy(data() + old_size, src.data(), src.size()); return; } AppendSlow(src); } inline const char* CompactString::c_str() ABSL_ATTRIBUTE_LIFETIME_BOUND { const size_t used_size = size(); // Allocate just enough for NUL, do not call `reserve(used_size + 1)` here // because that could overallocate by 50%. In `c_str()` it is likely that the // string already has its final value. if (ABSL_PREDICT_FALSE(used_size == capacity())) ReserveOneMoreByteSlow(); char* const ptr = data(); ptr[used_size] = '\0'; return ptr; } inline const char* CompactString::CStrFromRaw(uintptr_t* raw) { RIEGELI_ASSERT_NE(*raw, 0u) << "Failed precondition of CompactString::CStrFromRaw(): " "representation is zero"; RIEGELI_ASSERT_EQ(*raw & 1, 0u) << "Failed precondition of CompactString::CStrFromRaw(): " "representation is not even"; uintptr_t tag = *raw & kTagMask; char* ptr; size_t used_size; size_t capacity; if (tag == kInlineTag) { ptr = inline_data(raw); used_size = inline_size(*raw); capacity = kInlineCapacity; } else { ptr = allocated_data(*raw); used_size = allocated_size_for_tag(tag, *raw); capacity = allocated_capacity_for_tag(tag, *raw); } if (ABSL_PREDICT_FALSE(used_size == capacity)) { CompactString str = CompactString::MoveFromRaw(*raw); str.ReserveOneMoreByteSlow(); *raw = std::move(str).RawMove(); tag = *raw & kTagMask; ptr = allocated_data(*raw); used_size = allocated_size_for_tag(tag, *raw); } ptr[used_size] = '\0'; return ptr; } template inline void CompactString::RegisterSubobjects( MemoryEstimator& memory_estimator) const { const uintptr_t tag = repr_ & kTagMask; if (tag == kInlineTag) return; const size_t offset = tag == 0 ? 2 * sizeof(size_t) : IntCast(tag); memory_estimator.RegisterDynamicMemory( allocated_data() - offset, offset + allocated_capacity_for_tag(tag)); } } // namespace riegeli #endif // RIEGELI_BASE_COMPACT_STRING_H_ ================================================ FILE: riegeli/base/compare.h ================================================ // Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_COMPARE_H_ #define RIEGELI_BASE_COMPARE_H_ #include #include "absl/base/nullability.h" #include "absl/strings/string_view.h" // IWYU pragma: keep #if !__cpp_impl_three_way_comparison #include "absl/types/compare.h" #endif ABSL_POINTERS_DEFAULT_NONNULL // Emulate C++20 `operator<=>` machinery for earlier C++ versions. namespace riegeli { // `PartialOrdering` is `std::partial_ordering` in C++20 or // `absl::partial_ordering` in earlier C++ versions. #if __cpp_impl_three_way_comparison using PartialOrdering = decltype(0.0 <=> 0.0); #else using PartialOrdering = absl::partial_ordering; #endif // `WeakOrdering` is not provided because it cannot be implemented without // conditionally including ``. // `StrongOrdering` is `std::strong_ordering` in C++20 or // `absl::strong_ordering` in earlier C++ versions. #if __cpp_impl_three_way_comparison using StrongOrdering = decltype(0 <=> 0); #else using StrongOrdering = absl::strong_ordering; #endif // Define `friend auto RIEGELI_COMPARE` instead of C++20 // `friend auto operator<=>`. // // It should return `PartialOrdering` or `StrongOrdering`. // // It is meant to be called by `riegeli::Compare(a, b)`, not directly as // `RIEGELI_COMPARE(a, b)`. #if __cpp_impl_three_way_comparison #define RIEGELI_COMPARE operator<=> #else #define RIEGELI_COMPARE RiegeliCompare #endif // `IsOrdering::value` is `true` if values of type `T` can be assumed to // indicate an ordering: they are comparable with literal 0. // // This includes `{std,absl}::{partial,weak,strong}_ordering`, and `int` being // the result of `std::memcmp()` or `absl::string_view::compare()`. template struct IsOrdering : std::false_type {}; template struct IsOrdering() < 0), decltype(std::declval() > 0), decltype(std::declval() == 0)>> : std::true_type {}; // `IsTotalOrdering::value` is `true` if values of type `T` can be assumed to // indicate a total ordering: they are comparable with literal 0, and // `T::unordered` is not defined. // // This includes `{std,absl}::{weak,strong}_ordering`, and `int` being the // result of `std::memcmp()` or `absl::string_view::compare()`. template struct IsTotalOrdering : IsOrdering {}; template struct IsTotalOrdering> : std::false_type {}; namespace compare_internal { template struct IsTotalOrderingWithEqual : std::false_type {}; template struct IsTotalOrderingWithEqual> : IsTotalOrdering {}; } // namespace compare_internal // `IsStrongOrdering::value` is `true` if values of type `T` can be assumed // to indicate a strong ordering: they are comparable with literal 0, // `T::unordered` is not defined, and either `T::equivalent` is not defined or // `T::equal` is defined too. // // This includes `{std,absl}::strong_ordering`, and `int` being the result of // `std::memcmp()` or `absl::string_view::compare()`. template struct IsStrongOrdering : IsTotalOrdering {}; template struct IsStrongOrdering> : compare_internal::IsTotalOrderingWithEqual {}; // Converts a value indicating an ordering to `PartialOrdering`. template , std::is_convertible>, int> = 0> inline PartialOrdering AsPartialOrdering(T ordering) { return ordering; } template , std::negation>>, int> = 0> inline PartialOrdering AsPartialOrdering(T ordering) { return ordering < 0 ? PartialOrdering::less : ordering > 0 ? PartialOrdering::greater : ordering == 0 ? PartialOrdering::equivalent : PartialOrdering::unordered; } // Converts a value indicating a strong ordering to `StrongOrdering`. template < typename T, std::enable_if_t, std::is_convertible>, int> = 0> inline StrongOrdering AsStrongOrdering(T ordering) { return ordering; } template < typename T, std::enable_if_t, std::negation>>, int> = 0> inline StrongOrdering AsStrongOrdering(T ordering) { return ordering < 0 ? StrongOrdering::less : ordering > 0 ? StrongOrdering::greater : StrongOrdering::equal; } #if !__cpp_impl_three_way_comparison // Definitions of `RIEGELI_COMPARE` which in C++20 are provided automatically. template < typename A, typename B, std::enable_if_t< std::conjunction_v, std::is_integral>, int> = 0> inline StrongOrdering RIEGELI_COMPARE(A a, B b) { return a < b ? StrongOrdering::less : a > b ? StrongOrdering::greater : StrongOrdering::equal; } template , std::is_integral>>, std::is_arithmetic, std::is_arithmetic>, int> = 0> inline PartialOrdering RIEGELI_COMPARE(A a, B b) { static_assert(std::is_floating_point_v || std::is_floating_point_v, "Arithmetic types which are not integral types " "must be floating point types"); return a < b ? PartialOrdering::less : a > b ? PartialOrdering::greater : a == b ? PartialOrdering::equivalent : PartialOrdering::unordered; } template , int> = 0> inline StrongOrdering RIEGELI_COMPARE(T a, T b) { return a < b ? StrongOrdering::less : a > b ? StrongOrdering::greater : StrongOrdering::equal; } template inline StrongOrdering RIEGELI_COMPARE(T* a, T* b) { return a < b ? StrongOrdering::less : a > b ? StrongOrdering::greater : StrongOrdering::equal; } inline StrongOrdering RIEGELI_COMPARE(absl::string_view a, absl::string_view b) { return AsStrongOrdering(a.compare(b)); } #endif namespace compare_internal { #if !__cpp_impl_three_way_comparison template struct HasEqual : std::false_type {}; template struct HasEqual< A, B, std::void_t() == std::declval())>> : std::true_type {}; #endif template struct HasCompare : std::false_type {}; template struct HasCompare() <=> std::declval() #else RIEGELI_COMPARE(std::declval(), std::declval()) #endif )>> : std::true_type { }; template struct IsDedicatedOrdering : std::false_type {}; template struct IsDedicatedOrdering> : std::true_type {}; template struct HasCompareWithLiteral0 : std::false_type {}; template struct HasCompareWithLiteral0 std::declval() #else RIEGELI_COMPARE(0, std::declval()) #endif )>> : std::true_type { }; } // namespace compare_internal // Call `riegeli::Compare(a, b)` instead of C++20 `a <=> b`. template ::value, int> = 0> inline auto Compare(const A& a, const B& b) { #if __cpp_impl_three_way_comparison return a <=> b; #else return RIEGELI_COMPARE(a, b); #endif } // Call `NegateOrdering(ordering)` instead of C++20 `0 <=> ordering`. // // `riegeli::Compare(0, ordering)` does not work because it does not properly // forward to `<=>` the property that the argument is a literal 0. template < typename Ordering, std::enable_if_t< std::conjunction_v, compare_internal::HasCompareWithLiteral0>, int> = 0> inline Ordering NegateOrdering(Ordering ordering) { #if __cpp_impl_three_way_comparison return 0 <=> ordering; #else return RIEGELI_COMPARE(0, ordering); #endif } template < typename Ordering, std::enable_if_t< std::conjunction_v< compare_internal::IsDedicatedOrdering, std::negation>>, int> = 0> inline Ordering NegateOrdering(Ordering ordering) { if (0 < ordering) return Ordering::less; if (0 > ordering) return Ordering::greater; return ordering; } // For types which support equality, derive `T` from `WithEqual`, and define // `friend bool operator==` with the first parameter of type `const T&` or `T`, // and the second parameter of the same type, or possibly also of other types. // // `WithEqual` provides `!=`. For heterogeneous equality it provides `==` and // `!=` with swapped parameters. // // In C++20 this is automatic. template class WithEqual { public: #if !__cpp_impl_three_way_comparison template < typename Other, std::enable_if_t::value, int> = 0> friend bool operator!=(const T& a, const Other& b) { return !(a == b); } template < typename Other, std::enable_if_t>, compare_internal::HasEqual>, int> = 0> friend bool operator==(const Other& a, const T& b) { return b == a; } template < typename Other, std::enable_if_t>, compare_internal::HasEqual>, int> = 0> friend bool operator!=(const Other& a, const T& b) { return !(b == a); } #endif }; // For types which support comparison, derive `T` from `WithCompare`. and // define `friend bool operator==` and `friend auto RIEGELI_COMPARE` with the // first parameter of type `const T&` or `T`, and the second parameter of the // same type, or possibly also of other types. // // `WithCompare` provides `!=`, `<`, `>`, `<=`, and `>=`. For heterogeneous // comparison it provides `==`, `!=`, `RIEGELI_COMPARE, `<`, `>`, `<=`, and `>=` // with swapped parameters. // // In C++20 this is automatic. template class WithCompare : public WithEqual { public: #if !__cpp_impl_three_way_comparison template < typename Other, std::enable_if_t::value, int> = 0> friend bool operator<(const T& a, const Other& b) { return RIEGELI_COMPARE(a, b) < 0; } template < typename Other, std::enable_if_t::value, int> = 0> friend bool operator>(const T& a, const Other& b) { return RIEGELI_COMPARE(a, b) > 0; } template < typename Other, std::enable_if_t::value, int> = 0> friend bool operator<=(const T& a, const Other& b) { return RIEGELI_COMPARE(a, b) <= 0; } template < typename Other, std::enable_if_t::value, int> = 0> friend bool operator>=(const T& a, const Other& b) { return RIEGELI_COMPARE(a, b) >= 0; } template >, compare_internal::HasCompare>, int> = 0> friend auto RIEGELI_COMPARE(const Other& a, const T& b) { return NegateOrdering(RIEGELI_COMPARE(b, a)); } template >, compare_internal::HasCompare>, int> = 0> friend bool operator<(const Other& a, const T& b) { return 0 < RIEGELI_COMPARE(b, a); } template >, compare_internal::HasCompare>, int> = 0> friend bool operator>(const Other& a, const T& b) { return 0 > RIEGELI_COMPARE(b, a); } template >, compare_internal::HasCompare>, int> = 0> friend bool operator<=(const Other& a, const T& b) { return 0 <= RIEGELI_COMPARE(b, a); } template >, compare_internal::HasCompare>, int> = 0> friend bool operator>=(const Other& a, const T& b) { return 0 >= RIEGELI_COMPARE(b, a); } #endif }; } // namespace riegeli #endif // RIEGELI_BASE_COMPARE_H_ ================================================ FILE: riegeli/base/constexpr.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_CONSTEXPR_H_ #define RIEGELI_BASE_CONSTEXPR_H_ #include "absl/base/nullability.h" #include "riegeli/base/port.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // Returns `true` if the value of the expression is known at compile time. #if RIEGELI_INTERNAL_HAS_BUILTIN(__builtin_constant_p) || \ RIEGELI_INTERNAL_IS_GCC_VERSION(3, 1) #define RIEGELI_IS_CONSTANT(expr) __builtin_constant_p(expr) #else #define RIEGELI_IS_CONSTANT(expr) false #endif } // namespace riegeli #endif // RIEGELI_BASE_CONSTEXPR_H_ ================================================ FILE: riegeli/base/cord_iterator_span.cc ================================================ // Copyright 2025 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/cord_iterator_span.h" #include #include #include #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "absl/strings/cord.h" #include "absl/strings/resize_and_overwrite.h" #include "absl/strings/string_view.h" #include "riegeli/base/assert.h" #include "riegeli/base/string_utils.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { void CordIteratorSpan::ReadSlow(absl::Cord::CharIterator& src, size_t length, char* dest) { absl::string_view chunk = absl::Cord::ChunkRemaining(src); RIEGELI_ASSERT_LT(chunk.size(), length) << "Failed precondition of CordIteratorSpan::ReadSlow(): " "enough data available, use Read() instead"; do { std::memcpy(dest, chunk.data(), chunk.size()); absl::Cord::Advance(&src, chunk.size()); dest += chunk.size(); length -= chunk.size(); chunk = absl::Cord::ChunkRemaining(src); } while (chunk.size() < length); std::memcpy(dest, chunk.data(), length); absl::Cord::Advance(&src, length); } absl::string_view CordIteratorSpan::ToStringView(std::string& scratch) && { absl::Cord::CharIterator& iter = *iterator_; size_t length = length_; if (length == 0) return absl::string_view(); absl::string_view chunk = absl::Cord::ChunkRemaining(iter); if (ABSL_PREDICT_TRUE(chunk.size() >= length)) { absl::Cord::Advance(&iter, length); return chunk.substr(0, length); } scratch.clear(); riegeli::StringResizeAndOverwriteAmortized(scratch, length, [&](char* data, size_t size) { ReadSlow(iter, size, data); return size; }); return scratch; } void CordIteratorSpan::ToString(std::string& dest) && { absl::Cord::CharIterator& iter = *iterator_; size_t length = length_; dest.clear(); absl::StringResizeAndOverwrite(dest, length, [&](char* data, size_t size) { Read(iter, size, data); return size; }); } } // namespace riegeli ================================================ FILE: riegeli/base/cord_iterator_span.h ================================================ // Copyright 2025 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_CORD_ITERATOR_SPAN_H_ #define RIEGELI_BASE_CORD_ITERATOR_SPAN_H_ #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "absl/strings/cord.h" #include "absl/strings/string_view.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/dependency.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `CordIteratorSpan` specifies a span of `absl::Cord::CharIterator` contents // from the current position with the given length. // // This can express the span as a single object, which is sometimes convenient. class CordIteratorSpan { public: // Returns the number of bytes from `src` to the end of the `absl::Cord`. static size_t Remaining(const absl::Cord::CharIterator& src) { return IntCast( absl::Cord::Distance(src, absl::Cord::CharIterator())); } // Copies `length` bytes from `src` to `dest[]`. static void Read(absl::Cord::CharIterator& src, size_t length, char* absl_nullable dest); // Specifies the span from the current position of `*src` with `length`. explicit CordIteratorSpan(absl::Cord::CharIterator* src ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t length) : iterator_(src), length_(length) { RIEGELI_ASSERT_LE(length, Remaining(*iterator_)) << "Failed precondition of CordIteratorSpan: not enough remaining data"; } CordIteratorSpan(CordIteratorSpan&& that) = default; CordIteratorSpan& operator=(CordIteratorSpan&& that) = default; absl::Cord::CharIterator& iterator() const { return *iterator_; } size_t length() const { return length_; } // Destructively reads the contents of the span to an `absl::Cord`. // // An implicit conversion allows to use a `CordIteratorSpan` when an // `absl::Cord` is expected. Some functions treat a parameter of type // `CordIteratorSpan` specially to enable a more efficient implementation. /*implicit*/ operator absl::Cord() && { return std::move(*this).ToCord(); } // Destructively reads the contents of the span to an `absl::Cord`. absl::Cord ToCord() && { return absl::Cord::AdvanceAndRead(iterator_, length_); } // Destructively reads the contents of the span to an `absl::string_view`. // // May use `scratch` for storage for the result. absl::string_view ToStringView(std::string& scratch) &&; // Destructively reads the contents of the span to an existing `std::string`. void ToString(std::string& dest) &&; // Returns the contents of the span as an `absl::string_view` if it is flat. // Otherwise returns `std::nullopt`. std::optional TryFlat() const; private: static void ReadSlow(absl::Cord::CharIterator& src, size_t length, char* dest); absl::Cord::CharIterator* iterator_; size_t length_; }; // Specialization of `DependencyImpl`. // // This allows to pass a `CordIteratorSpan` as a parameter of `CordReader`. template <> class DependencyImpl { public: explicit DependencyImpl(CordIteratorSpan span) : span_(std::move(span)), cord_(std::move(span_)) {} CordIteratorSpan& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND { return span_; } const CordIteratorSpan& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return span_; } const absl::Cord* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return &cord_; } bool IsOwning() const { return false; } static constexpr bool kIsStable = false; protected: DependencyImpl(DependencyImpl&& that) = default; DependencyImpl& operator=(DependencyImpl&& that) = default; ~DependencyImpl() = default; private: CordIteratorSpan span_; const absl::Cord cord_; }; // Implementation details follow. inline void CordIteratorSpan::Read(absl::Cord::CharIterator& src, size_t length, char* absl_nullable dest) { RIEGELI_ASSERT_LE(length, Remaining(src)) << "Failed precondition of CordIteratorSpan::Read(): " "not enough remaining data"; if (length == 0) return; RIEGELI_ASSERT(dest != nullptr) << "Failed precondition of CordIteratorSpan::Read(): " "non-empty span from nullptr"; const absl::string_view chunk = absl::Cord::ChunkRemaining(src); if (ABSL_PREDICT_FALSE(chunk.size() < length)) { ReadSlow(src, length, dest); return; } std::memcpy(dest, chunk.data(), length); absl::Cord::Advance(&src, length); } inline std::optional CordIteratorSpan::TryFlat() const { if (length_ == 0) return absl::string_view(); absl::string_view chunk = absl::Cord::ChunkRemaining(*iterator_); if (ABSL_PREDICT_FALSE(chunk.size() < length_)) return std::nullopt; return chunk.substr(0, length_); } } // namespace riegeli #endif // RIEGELI_BASE_CORD_ITERATOR_SPAN_H_ ================================================ FILE: riegeli/base/cord_utils.cc ================================================ // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/cord_utils.h" #include #include #include #include "absl/base/nullability.h" #include "absl/strings/cord.h" #include "absl/strings/cord_buffer.h" #include "absl/strings/string_view.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/string_utils.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli::cord_internal { void CopyCordToArray(const absl::Cord& src, char* absl_nullable dest) { for (const absl::string_view fragment : src.Chunks()) { std::memcpy(dest, fragment.data(), fragment.size()); dest += fragment.size(); } } absl::Cord MakeBlockyCord(absl::string_view src) { absl::Cord dest; AppendToBlockyCord(src, dest); return dest; } void AssignToBlockyCord(absl::string_view src, absl::Cord& dest) { if (src.size() <= absl::CordBuffer::kDefaultLimit) { dest = src; return; } dest.Clear(); AppendToBlockyCord(src, dest); } void AppendToBlockyCord(absl::string_view src, absl::Cord& dest) { if (src.empty()) return; { absl::CordBuffer buffer = dest.GetAppendBuffer(0, 1); const size_t existing_length = buffer.length(); if (existing_length > 0) { buffer.SetLength( UnsignedMin(existing_length + src.size(), buffer.capacity())); std::memcpy(buffer.data() + existing_length, src.data(), buffer.length() - existing_length); src.remove_prefix(buffer.length() - existing_length); dest.Append(std::move(buffer)); if (src.empty()) return; } } do { absl::CordBuffer buffer = absl::CordBuffer::CreateWithCustomLimit( kCordBufferBlockSize, src.size()); buffer.SetLength(UnsignedMin(src.size(), buffer.capacity())); std::memcpy(buffer.data(), src.data(), buffer.length()); src.remove_prefix(buffer.length()); dest.Append(std::move(buffer)); } while (!src.empty()); } void PrependToBlockyCord(absl::string_view src, absl::Cord& dest) { while (!src.empty()) { absl::CordBuffer buffer = absl::CordBuffer::CreateWithCustomLimit( kCordBufferBlockSize, src.size()); buffer.SetLength(UnsignedMin(src.size(), buffer.capacity())); std::memcpy(buffer.data(), src.data() + src.size() - buffer.length(), buffer.length()); src.remove_suffix(buffer.length()); dest.Prepend(std::move(buffer)); } } } // namespace riegeli::cord_internal ================================================ FILE: riegeli/base/cord_utils.h ================================================ // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_CORD_UTILS_H_ #define RIEGELI_BASE_CORD_UTILS_H_ #include #include #include "absl/base/nullability.h" #include "absl/numeric/bits.h" #include "absl/strings/cord.h" #include "absl/strings/cord_buffer.h" #include "absl/strings/string_view.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/buffering.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli::cord_internal { // `absl::cord_internal::kFlatOverhead`. Does not have to be accurate. inline constexpr size_t kFlatOverhead = sizeof(size_t) + sizeof(uint32_t) + sizeof(uint8_t); // The `block_size` parameter for `absl::CordBuffer::CreateWithCustomLimit()`. inline constexpr size_t kCordBufferBlockSize = UnsignedMin(kDefaultMaxBlockSize, absl::CordBuffer::kCustomLimit); // Maximum usable size supported by `absl::CordBuffer`. inline constexpr size_t kCordBufferMaxSize = absl::CordBuffer::MaximumPayload(kCordBufferBlockSize); // When deciding whether to copy an array of bytes or share memory to an // `absl::Cord`, prefer copying up to this length when creating a new // `absl::Cord`. // // This is `absl::cord_internal::kMaxInline`. Does not have to be accurate. inline constexpr size_t kMaxBytesToCopyToEmptyCord = 15; // When deciding whether to copy an array of bytes or share memory to an // `absl::Cord`, prefer copying up to this length when appending to a non-empty // `absl::Cord`. // // This is `absl::cord_internal::kMaxBytesToCopy`. Does not have to be accurate. inline constexpr size_t kMaxBytesToCopyToNonEmptyCord = 511; // When deciding whether to copy an array of bytes or share memory to an // `absl::Cord`, prefer copying up to this length when appending to `dest`. // // `absl::Cord::Append(absl::Cord)` chooses to copy bytes from a source up to // this length, so it is better to avoid constructing the source as `absl::Cord` // if it will not be shared anyway. inline size_t MaxBytesToCopyToCord(absl::Cord& dest) { if (dest.empty()) return kMaxBytesToCopyToEmptyCord; return kMaxBytesToCopyToNonEmptyCord; } // Copies `src` to `dest[]`. // // `dest[]` must have sufficient size for `src.size()`, and `dest` may be // `nullptr` only if `src.empty()`. void CopyCordToArray(const absl::Cord& src, char* absl_nullable dest); // Variants of `absl::Cord` operations with different block sizing tradeoffs: // * `MakeBlockyCord(src)` is like `absl::Cord(src)`. // * `AssignToBlockyCord(src, dest)` is like `dest = src`. // * `AppendToBlockyCord(src, dest)` is like `dest.Append(src)`. // * `PrependToBlockyCord(src, dest)` is like `dest.Prepend(src)`. // // They avoid splitting `src` into 4083-byte fragments and avoid overallocation, // without guarantees. absl::Cord MakeBlockyCord(absl::string_view src); void AssignToBlockyCord(absl::string_view src, absl::Cord& dest); void AppendToBlockyCord(absl::string_view src, absl::Cord& dest); void PrependToBlockyCord(absl::string_view src, absl::Cord& dest); // Returns usable size provided by `absl::CordBuffer::CreateWithCustomLimit()` // called with `kCordBufferBlockSize` and `capacity`. Does not have to be // accurate. inline size_t CordBufferSizeForCapacity(size_t capacity) { if (capacity >= kCordBufferMaxSize) return kCordBufferMaxSize; if (capacity <= absl::CordBuffer::kDefaultLimit) return capacity; if (!absl::has_single_bit(capacity)) { static constexpr size_t kMaxPageSlop = 128; const size_t rounded_up = size_t{1} << absl::bit_width(capacity - 1); const size_t slop = rounded_up - capacity; if (slop >= kFlatOverhead && slop <= kMaxPageSlop + kFlatOverhead) { capacity = rounded_up; } else { const size_t rounded_down = size_t{1} << (absl::bit_width(capacity) - 1); capacity = rounded_down; } } return capacity - kFlatOverhead; } } // namespace riegeli::cord_internal #endif // RIEGELI_BASE_CORD_UTILS_H_ ================================================ FILE: riegeli/base/debug.cc ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/debug.h" #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/strings/cord.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { namespace { inline void WriteHex1(uint8_t src, DebugStream& dest) { dest.Write(static_cast(src + (src < 10 ? '0' : 'a' - 10))); } inline void WriteHex2(uint8_t src, DebugStream& dest) { WriteHex1(static_cast(src >> 4), dest); WriteHex1(static_cast(src & 0x0f), dest); } inline void WriteHex4(uint16_t src, DebugStream& dest) { WriteHex2(static_cast(src >> 8), dest); WriteHex2(static_cast(src & 0xff), dest); } inline void WriteHex8(uint32_t src, DebugStream& dest) { WriteHex4(static_cast(src >> 16), dest); WriteHex4(static_cast(src & 0xffff), dest); } template void WriteChar(CharType src, DebugStream& dest) { if (src >= 32 && src <= 126) { if (src == quote || src == '\\') dest.Write('\\'); dest.Write(static_cast(src)); return; } switch (src) { case '\t': dest.Write("\\t"); break; case '\n': dest.Write("\\n"); break; case '\r': dest.Write("\\r"); break; default: { const auto unsigned_src = static_cast(src); if (unsigned_src <= 0xff) { dest.Write("\\x{"); WriteHex2(static_cast(unsigned_src), dest); } else { dest.Write("\\u{"); if (unsigned_src <= 0xffff) { WriteHex4(static_cast(unsigned_src), dest); } else { WriteHex8(unsigned_src, dest); } } dest.Write('}'); break; } } } template void WriteQuotedChar(CharType src, DebugStream& dest) { dest.Write('\''); WriteChar<'\'', IntType>(src, dest); dest.Write('\''); } template void WriteQuotedString(absl::Span src, DebugStream& dest) { dest.Write('"'); for (const CharType ch : src) { WriteChar<'"', IntType>(ch, dest); } dest.Write('"'); } } // namespace void DebugStream::DebugStringFragment(absl::string_view src) { for (const char ch : src) { WriteChar<'"', uint8_t>(ch, *this); } } void RiegeliDebug(bool src, DebugStream& dest) { dest.Write(src ? absl::string_view("true") : absl::string_view("false")); } void RiegeliDebug(char src, DebugStream& dest) { WriteQuotedChar(src, dest); } void RiegeliDebug(wchar_t src, DebugStream& dest) { WriteQuotedChar>( src, dest); } #if __cpp_char8_t void RiegeliDebug(char8_t src, DebugStream& dest) { WriteQuotedChar(src, dest); } #endif // __cpp_char8_t void RiegeliDebug(char16_t src, DebugStream& dest) { WriteQuotedChar(src, dest); } void RiegeliDebug(char32_t src, DebugStream& dest) { WriteQuotedChar(src, dest); } void RiegeliDebug(absl::string_view src, DebugStream& dest) { WriteQuotedString(absl::MakeConstSpan(src), dest); } void RiegeliDebug(std::wstring_view src, DebugStream& dest) { WriteQuotedString< std::conditional_t>( absl::MakeConstSpan(src), dest); } #if __cpp_char8_t void RiegeliDebug(std::u8string_view src, DebugStream& dest) { WriteQuotedString(absl::MakeConstSpan(src), dest); } #endif // __cpp_char8_t void RiegeliDebug(std::u16string_view src, DebugStream& dest) { WriteQuotedString(absl::MakeConstSpan(src), dest); } void RiegeliDebug(std::u32string_view src, DebugStream& dest) { WriteQuotedString(absl::MakeConstSpan(src), dest); } void RiegeliDebug(const absl::Cord& src, DebugStream& dest) { dest.DebugStringQuote(); for (const absl::string_view fragment : src.Chunks()) { dest.DebugStringFragment(fragment); } dest.DebugStringQuote(); } void RiegeliDebug(const void* absl_nullable src, DebugStream& dest) { if (src == nullptr) { dest.Write("nullptr"); } else { dest << src; } } void RiegeliDebug(ABSL_ATTRIBUTE_UNUSED std::nullptr_t src, DebugStream& dest) { dest.Write("nullptr"); } void RiegeliDebug(ABSL_ATTRIBUTE_UNUSED std::nullopt_t src, DebugStream& dest) { dest.Write("nullopt"); } } // namespace riegeli ================================================ FILE: riegeli/base/debug.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_DEBUG_H_ #define RIEGELI_BASE_DEBUG_H_ #include #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/strings/cord.h" #include "absl/strings/has_absl_stringify.h" #include "absl/strings/string_view.h" #include "riegeli/base/stream_utils.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { class DebugStream; namespace debug_internal { template struct HasRiegeliDebug : std::false_type {}; template struct HasRiegeliDebug< T, std::void_t(), std::declval()))>> : std::true_type {}; template struct HasDebugString : std::false_type {}; template struct HasDebugString< T, std::enable_if_t().DebugString()), absl::string_view>>> : std::true_type {}; template struct HasOperatorOutput : std::false_type {}; template struct HasOperatorOutput() << std::declval())>> : std::true_type {}; } // namespace debug_internal // `SupportsDebug::value` is `true` if `T` supports `riegeli::Debug()`: // writing the value in a format suitable for error messages. // // The value is generally written in a way which reflects as much as is compared // by `operator==`, without indicating the type nor internal structure, using // syntax similar to C++ expressions. template struct SupportsDebug : std::disjunction< debug_internal::HasRiegeliDebug, debug_internal::HasDebugString, absl::HasAbslStringify, debug_internal::HasOperatorOutput> {}; // To customize `riegeli::Debug()` for a class `T`, define a free function // `friend void RiegeliDebug(const T& src, DebugStream& dest)` as a friend of // `T` inside class definition or in the same namespace as `T`, so that it can // be found via ADL. `DebugStream` in the parameter type can also be a template // parameter to reduce library dependencies. // // `riegeli::Debug(src)` uses the first defined form among the following: // * `RiegeliDebug(src, dest)` // * `src.DebugString()` // * `dest << src` // * `AbslStringify(dest, src)` class DebugStream { public: // Will write to `dest`. explicit DebugStream(std::ostream* dest ABSL_ATTRIBUTE_LIFETIME_BOUND) : dest_(dest) {} DebugStream(const DebugStream& that) = default; DebugStream& operator=(const DebugStream& that) = default; // Writes a character using `std::ostream::write()`. void Write(char src) { dest_->write(&src, 1); } // Writes a string using `std::ostream::write()`. void Write(absl::string_view src) { dest_->write(src.data(), static_cast(src.size())); } // Writes a value formatted using `operator<<`. // // Using stream manipulators is supported, but if the stream state is not // reset to the default before calling `Debug()`, then the results can be // inconsistent, depending on the type being written. template DebugStream& operator<<(T&& src) { *dest_ << std::forward(src); return *this; } // Writes a value in a format suitable for error messages. This calls the // first defined form among the following: // * `RiegeliDebug(src, *this)` // * `Write(src.DebugString())` // * `AbslStringify(sink, src)` for `OStreamStringifySink(dest)` // * `*dest << src` // // This is used to implement `riegeli::Debug()`, and to write subobjects by // implementations of `RiegeliDebug()` for objects containing them. template ::value, int> = 0> void Debug(const T& src) { if constexpr (debug_internal::HasRiegeliDebug::value) { RiegeliDebug(src, *this); } else if constexpr (debug_internal::HasDebugString::value) { Write(src.DebugString()); } else if constexpr (debug_internal::HasOperatorOutput::value) { *dest_ << src; } else { static_assert(absl::HasAbslStringify::value); OStreamStringifySink sink(dest_); AbslStringify(sink, src); } } // To implement `RiegeliDebug()` for string-like types which are not // represented as one fragment, the following pattern can be used: // // ``` // dest.DebugStringQuote(); // for (const absl::string_view fragment : fragments) { // dest.DebugStringFragment(fragment); // } // dest.DebugStringQuote(); // ``` // // If the representation is always flat, relying on `Debug()` for // `absl::string_view` is sufficient. void DebugStringQuote() { Write('"'); } void DebugStringFragment(absl::string_view src); private: std::ostream* dest_; }; // The following overloads cover supported types which do not define // `RiegeliDebug()` themselves. // `bool` is written as `true` or `false`. void RiegeliDebug(bool src, DebugStream& dest); // Non-bool and non-character numeric types, including `signed char` and // `unsigned char`, are written as numbers. inline void RiegeliDebug(signed char src, DebugStream& dest) { dest << int{src}; } inline void RiegeliDebug(unsigned char src, DebugStream& dest) { dest << unsigned{src}; } inline void RiegeliDebug(short src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(unsigned short src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(int src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(unsigned src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(long src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(unsigned long src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(long long src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(unsigned long long src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(float src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(double src, DebugStream& dest) { dest << src; } inline void RiegeliDebug(long double src, DebugStream& dest) { dest << src; } // Character types are written in C++ character literal format. void RiegeliDebug(char src, DebugStream& dest); void RiegeliDebug(wchar_t src, DebugStream& dest); #if __cpp_char8_t void RiegeliDebug(char8_t src, DebugStream& dest); #endif void RiegeliDebug(char16_t src, DebugStream& dest); void RiegeliDebug(char32_t src, DebugStream& dest); // Enumeration types are written like their underlying types. template , int> = 0> void RiegeliDebug(T src, DebugStream& dest) { dest.Debug(static_cast>(src)); } // `absl::string_view` is written in C++ string literal format. // // This covers types implicitly convertible to `absl::string_view` like // `std::string` and `CompactString`. void RiegeliDebug(absl::string_view src, DebugStream& dest); void RiegeliDebug(std::wstring_view src, DebugStream& dest); #if __cpp_char8_t void RiegeliDebug(std::u8string_view src, DebugStream& dest); #endif void RiegeliDebug(std::u16string_view src, DebugStream& dest); void RiegeliDebug(std::u32string_view src, DebugStream& dest); // `absl::Cord` is written in C++ string literal format. void RiegeliDebug(const absl::Cord& src, DebugStream& dest); // A null pointer is written as "nullptr". Other data pointers, including char // pointers, as well as function pointers, are written using `operator<<` for // `const void*`. void RiegeliDebug(std::nullptr_t src, DebugStream& dest); void RiegeliDebug(const void* absl_nullable src, DebugStream& dest); template , int> = 0> void RiegeliDebug(T* absl_nullable src, DebugStream& dest) { dest.Debug(reinterpret_cast(src)); } // `std::unique_ptr` and `std::shared_ptr` are written like pointers. template void RiegeliDebug(const absl_nullable std::unique_ptr& src, DebugStream& dest) { dest.Debug(src.get()); } template void RiegeliDebug(const absl_nullable std::shared_ptr& src, DebugStream& dest) { dest.Debug(src.get()); } // `std::optional` values are written as "nullopt" when absent, or as the // underlying data wrapped in braces when present. void RiegeliDebug(std::nullopt_t src, DebugStream& dest); template ::value, int> = 0> void RiegeliDebug(const std::optional& src, DebugStream& dest) { if (src == std::nullopt) { dest.Debug(std::nullopt); } else { dest.Write('{'); dest.Debug(*src); dest.Write('}'); } } // The type returned by `riegeli::Debug()`. template class DebugType { public: template , int> = 0> explicit DebugType(const T& src) : src_(src) {} template , int> = 0> explicit DebugType(T&& src) : src_(std::forward(src)) {} DebugType(const DebugType& that) = default; DebugType& operator=(const DebugType& that) = default; DebugType(DebugType&& that) = default; DebugType& operator=(DebugType&& that) = default; template friend void AbslStringify(Sink& dest, const DebugType& src) { StringifyOStream stream(&dest); DebugStream(&stream).Debug(src.src_); } // Faster implementation if `Sink` is `OStreamStringifySink`. friend void AbslStringify(OStreamStringifySink& dest, const DebugType& src) { DebugStream(dest.dest()).Debug(src.src_); } friend std::ostream& operator<<(std::ostream& dest, const DebugType& src) { DebugStream(&dest).Debug(src.src_); return dest; } std::string ToString() const { std::string dest; StringOStream stream(&dest); DebugStream(&stream).Debug(src_); return dest; } private: T src_; }; template explicit DebugType(T&& src) -> DebugType>; // `riegeli::Debug()` wraps an object such that it is formatted using // `DebugStream::Debug()` when explicitly converted to `std::string` or written // using `AbslStringify()` or `operator<<`. // // `riegeli::Debug()` does not own the object, even if it involves temporaries, // hence it should be stringified by the same expression which constructed it, // so that the temporaries outlive its usage. For storing a `DebugType` in a // variable or returning it from a function, construct `DebugType` directly. template ::value, int> = 0> inline DebugType Debug(const T& src ABSL_ATTRIBUTE_LIFETIME_BOUND) { return DebugType(src); } } // namespace riegeli #endif // RIEGELI_BASE_DEBUG_H_ ================================================ FILE: riegeli/base/dependency.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_DEPENDENCY_H_ #define RIEGELI_BASE_DEPENDENCY_H_ #include #include #include #include "absl/base/attributes.h" #include "absl/meta/type_traits.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/assert.h" #include "riegeli/base/bytes_ref.h" #include "riegeli/base/compare.h" #include "riegeli/base/dependency_manager.h" #include "riegeli/base/initializer.h" #include "riegeli/base/type_traits.h" namespace riegeli { // `Dependency` stores or refers to an optionally owned object // which is stored as type `Manager` and accessed through type `Handle`. // // When a dependent object is said to be owned by a host object or function, the // host is responsible for closing it when done, and certain other operations // are propagated to it. The host is usually also responsible for destroying the // owned object. // // Often `Handle` is some pointer `Base*`, and then `Manager` can be e.g. // `T` (owned), `T*` (not owned), `std::unique_ptr` (owned), // or `Any` (maybe owned), with some `T` derived from `Base`. // // Often `Dependency` is a member of a host class template // parameterized by `Manager`, with `Handle` fixed by the host class. The member // is initialized from an argument of a constructor or a resetting function. // A user of the host class specifies ownership of the dependent object and // possibly narrows its type by choosing the `Manager` template argument of the // host class. The `Manager` type can be deduced from a constructor argument // using CTAD, which is usually done by removing any toplevel references and // `const` qualifiers using `std::decay`. // // As an alternative to passing `std::move(manager)`, passing // `ClosingPtr(&manager)` avoids moving `manager`, but the caller must ensure // that the dependent object is valid while the host object needs it. // // `Manager` can also be `T&` (not owned) or `T&&` (owned). They are primarily // meant to be used with a host function rather than a host object, because such // a dependency stores only a reference to the dependent object. By convention a // reference argument is expected to be valid for the duration of the function // call but not necessarily after the function returns. The `Manager` type is // usually deduced from a function argument as a reference type rather than // using `std::decay`. // // `Manager` being `T&` is functionally equivalent to `T*`, but offers a more // idiomatic API for passing an object which does not need to be valid after the // function returns. // // `Manager` being `T&&` is similar to `ClosingPtrType`. In contrast to a // host class, a host function does not decay `T&&` to `T` and avoids moving // the `Manager`, because the dependent object can be expected to be valid for // the duration of the function call. // `Dependency` derives from `DependencyImpl` // which has specializations for various combinations of `Handle` and `Manager` // types. Some operations of `Dependency` are provided by `DependencyImpl`, // others are added by `Dependency` in a uniform way. // // `DependencyImpl` specializations often derive from // `DependencyManager` or `DependencyBase`. // // `DependencyManager` provides a preliminary interpretation of // `Manager` independently from `Handle`. This interpretation is then refined by // `DependencyImpl`. // Operations of `Dependency`: // // ``` // // Constructs a dummy `Manager` from // // `RiegeliDependencySentinel(static_cast(nullptr))`. Used // // when the host object is closed and does not need a dependent object. // // // // Supported optionally. // // // // Provided by `DependencyBase` and explicitly inherited. // Dependency(); // // // Copies or moves a `Manager`. Used to specify the initial value of the // // dependent object. // // // // Provided by `DependencyBase` and explicitly inherited. // explicit Dependency(Initializer manager); // // // Copies the dependency. // // // // Supported optionally. // Dependency(const Dependency& that) noexcept; // Dependency& operator=(const Dependency& that) noexcept; // // // Moves the dependency. // // // // Supported optionally. // Dependency(Dependency&& that) noexcept; // Dependency& operator=(Dependency&& that) noexcept; // // // Makes `*this` equivalent to a newly constructed Dependency. This avoids // // constructing a temporary Dependency and moving from it. // // // // The overload with no parameters is supported when the corresponding // // constructor is supported. // // // // Provided by `DependencyBase`. // ABSL_ATTRIBUTE_REINITIALIZES void Reset(); // ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer manager); // // // Exposes the stored `Manager`. // // // // Provided by `DependencyBase` or `DependencyImpl`. // Manager& manager(); // const Manager& manager() const; // // // The type returned by `get()`. // // // // Provided by `Dependency`, not `DependencyImpl`. // using Subhandle = ...; // // // Returns a `Handle` to the `Manager`. // // // // `get()` might return a subtype of `Handle` which retains more static // // type information about `Manager`, e.g. a pointer to a class derived from // // what `Handle` points to, or a class derived from `Handle`. // // // // The result is non-const even if the `Manager` is stored inside the // // `Dependency`. // // // // Provided by `DependencyImpl`. // Handle get() const; // // // If `Handle` is `Base*` or another dereferenceable type, `Dependency` can // // be used as a smart pointer to `Base`, for convenience. // // // // Provided by `Dependency`, not `DependencyImpl`. // Base& operator*() const { return *get(); } // Base* operator->() const { return get(); } // // // If `Handle` is `Base*` or another type comparable against `nullptr`, // // `Dependency` can be compared against `nullptr`. // // // // Provided by `Dependency`, not `DependencyImpl`. // friend bool operator==(const Dependency& a, std::nullptr_t) { // return a.get() == nullptr; // } // // // If `true`, the `Dependency` owns the dependent object, i.e. closing the // // host object should close the dependent object. // // // // Provided by `DependencyManagerImpl`, `DependencyImpl`, or `Dependency`. // // In `Dependency` implemented in terms of `kIsOwning`. // bool IsOwning() const; // // // The value of `IsOwning()` if known statically or mostly statically. // // // // This constant is optional. // // // // If `IsOwning()` returns a statically known constant, `kIsOwning` should // // be defined. `Dependency` will provide `IsOwning()`. // // // // If `IsOwning()` returns `true` except for a sentinel value like // // `nullptr`, e.g. for `std::unique_ptr`, `kIsOwning` can still be defined // // in addition to `IsOwning()`. This allows to use the static // // approximatimation when static selection is needed, with the caveat that // // it will return `true` also for the sentinel value. // // // // Provided by `DependencyManagerImpl` or `DependencyImpl`. // static constexpt bool kIsOwning; // // // If `true`, `get()` stays unchanged when a `Dependency` is moved. // // // // This can be used as an optimization to avoid recomputing values derived // // from them when a `Dependency` is moved. // // // // Provided by `DependencyBase`, `DependencyManagerImpl`, or // // `DependencyImpl`. // static constexpr bool kIsStable; // ``` // `DependencyImpl` specializations provide what `DependencyBase` provides // (constructors, `Reset()`, `manager()`, and `kIsStable`), and also `get()`, // `IsOwning()`, and `kIsOwning`. // This template is specialized but does not have a primary definition. template class DependencyImpl; // Specialization of `DependencyImpl` when // `DependencyManagerPtr` is a pointer convertible to `T*`. template class DependencyImpl< T*, Manager, std::enable_if_t>, std::is_same, std::nullptr_t>>, std::is_convertible, T*>>>> : public DependencyManager { public: using DependencyImpl::DependencyManager::DependencyManager; DependencyManagerPtr get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->ptr(); } protected: DependencyImpl(const DependencyImpl& that) = default; DependencyImpl& operator=(const DependencyImpl& that) = default; DependencyImpl(DependencyImpl&& that) = default; DependencyImpl& operator=(DependencyImpl&& that) = default; ~DependencyImpl() = default; }; // Specialization of `DependencyImpl, Manager>` when // `DependencyManagerRef` is explicitly convertible to `absl::Span`. // // Specialized separately for `get()` to return // `absl::Span>` if possible. template class DependencyImpl< absl::Span, Manager, std::enable_if_t>, std::is_constructible, DependencyManagerRef>>>> : public DependencyManager { public: using DependencyImpl::DependencyManager::DependencyManager; // Return `absl::Span>` when // `DependencyManagerRef` is convertible to it. template >, DependencyManagerRef>, int> = 0> absl::Span> get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return absl::Span>(*this->ptr()); } template >, DependencyManagerRef>, int> = 0> absl::Span get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return absl::Span(*this->ptr()); } static constexpr bool kIsStable = DependencyImpl::DependencyManager::kIsStable || std::is_same_v> || std::is_same_v>>; protected: DependencyImpl(const DependencyImpl& that) = default; DependencyImpl& operator=(const DependencyImpl& that) = default; DependencyImpl(DependencyImpl&& that) = default; DependencyImpl& operator=(DependencyImpl&& that) = default; ~DependencyImpl() = default; }; // Specialization of `DependencyImpl, Manager>` when // `DependencyManagerPtr` is `absl::Span` or // `absl::Span>`. // // Specialized separately for `get()` to return // `absl::Span>` if possible. template class DependencyImpl< absl::Span, Manager, std::enable_if_t, absl::Span>, std::is_same, absl::Span>>>>> : public DependencyManager { public: using DependencyImpl::DependencyManager::DependencyManager; DependencyManagerPtr get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->ptr(); } protected: DependencyImpl(const DependencyImpl& that) = default; DependencyImpl& operator=(const DependencyImpl& that) = default; DependencyImpl(DependencyImpl&& that) = default; DependencyImpl& operator=(DependencyImpl&& that) = default; ~DependencyImpl() = default; }; // Specialization of `DependencyImpl` when // `DependencyManagerRef` is convertible to `BytesRef`. template class DependencyImpl< absl::string_view, Manager, std::enable_if_t>, std::is_convertible, BytesRef>>>> : public DependencyManager { public: using DependencyImpl::DependencyManager::DependencyManager; absl::string_view get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return BytesRef(*this->ptr()); } static constexpr bool kIsStable = DependencyImpl::DependencyManager::kIsStable || std::is_same_v || std::is_same_v> || std::is_same_v>; protected: DependencyImpl(const DependencyImpl& that) = default; DependencyImpl& operator=(const DependencyImpl& that) = default; DependencyImpl(DependencyImpl&& that) = default; DependencyImpl& operator=(DependencyImpl&& that) = default; ~DependencyImpl() = default; }; // Specialization of `DependencyImpl` when // `DependencyManagerPtr` is `absl::Span` or // `absl::Span`. // // Specialized separately because `absl::Span` is not convertible // to `absl::string_view` in the regular way. template class DependencyImpl< absl::string_view, Manager, std::enable_if_t, absl::Span>, std::is_same, absl::Span>>>> : public DependencyManager { public: using DependencyImpl::DependencyManager::DependencyManager; absl::string_view get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { const absl::Span span = this->ptr(); return absl::string_view(span.data(), span.size()); } protected: DependencyImpl(const DependencyImpl& that) = default; DependencyImpl& operator=(const DependencyImpl& that) = default; DependencyImpl(DependencyImpl&& that) = default; DependencyImpl& operator=(DependencyImpl&& that) = default; ~DependencyImpl() = default; }; namespace dependency_internal { // `SupportsDependencyImpl::value` is `true` when // `DependencyImpl` is defined. template struct SupportsDependencyImpl : std::false_type {}; template struct SupportsDependencyImpl< Handle, Manager, std::void_t< decltype(std::declval&>().get())>> : std::true_type {}; // `DependencyDefault` extends // `DependencyImpl` with the basic cases when // `DependencyManagerRef` or `DependencyManagerPtr` is // explicitly convertible to `Handle`. // This template is specialized but does not have a primary definition. template class DependencyDefault; // Specialization of `DependencyDefault` when // `DependencyImpl` is defined: delegate to it. template class DependencyDefault< Handle, Manager, std::enable_if_t::value>> : public DependencyImpl { public: using DependencyDefault::DependencyImpl::DependencyImpl; static_assert( std::is_convertible_v< decltype(std::declval< const typename DependencyDefault::DependencyImpl&>() .get()), Handle>, "DependencyImpl::get() must return a subtype of Handle"); protected: DependencyDefault(const DependencyDefault& that) = default; DependencyDefault& operator=(const DependencyDefault& that) = default; DependencyDefault(DependencyDefault&& that) = default; DependencyDefault& operator=(DependencyDefault&& that) = default; ~DependencyDefault() = default; }; // Specialization of `DependencyDefault` when // `DependencyImpl` is not defined and // `DependencyManagerRef` is explicitly convertible to `Handle`: // let `get()` return `*ptr()`, as its original type if possible. template class DependencyDefault< Handle, Manager, std::enable_if_t>, std::is_pointer>, std::is_constructible>>>> : public DependencyManager { public: using DependencyDefault::DependencyManager::DependencyManager; // Return `DependencyManagerRef` when it is a subclass of `Handle`. template < typename DependentManager = Manager, std::enable_if_t, Handle*>, int> = 0> DependencyManagerRef get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return *this->ptr(); } template < typename DependentManager = Manager, std::enable_if_t, Handle*>, int> = 0> Handle get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return Handle(*this->ptr()); } static constexpr bool kIsStable = DependencyDefault::DependencyManager::kIsStable || std::is_convertible_v, Handle*>; protected: DependencyDefault(const DependencyDefault& that) = default; DependencyDefault& operator=(const DependencyDefault& that) = default; DependencyDefault(DependencyDefault&& that) = default; DependencyDefault& operator=(DependencyDefault&& that) = default; ~DependencyDefault() = default; }; // Specialization of `DependencyDefault` when // `DependencyImpl` is not defined, // `DependencyManagerRef` is not convertible to `Handle`, and // `DependencyManagerPtr` is explicitly convertible to `Handle`: // let `get()` return `ptr()`, as its original type if possible. template class DependencyDefault< Handle, Manager, std::enable_if_t>, std::negation>, std::is_constructible>>>, std::is_constructible>>>> : public DependencyManager { public: using DependencyDefault::DependencyManager::DependencyManager; // Return `DependencyManagerPtr` when it is a subclass of `Handle`. template < typename DependentManager = Manager, std::enable_if_t*, Handle*>, int> = 0> DependencyManagerPtr get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->ptr(); } template < typename DependentManager = Manager, std::enable_if_t*, Handle*>, int> = 0> Handle get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return Handle(this->ptr()); } static constexpr bool kIsStable = DependencyDefault::DependencyManager::kIsStable || std::is_convertible_v*, Handle*>; protected: DependencyDefault(const DependencyDefault& that) = default; DependencyDefault& operator=(const DependencyDefault& that) = default; DependencyDefault(DependencyDefault&& that) = default; DependencyDefault& operator=(DependencyDefault&& that) = default; ~DependencyDefault() = default; }; // `SupportsDependencyDefault::value` is `true` when // `DependencyDefault` is defined. template struct SupportsDependencyDefault : std::disjunction< dependency_internal::SupportsDependencyImpl, std::conjunction< std::is_pointer>, std::is_constructible>>, std::is_constructible>> {}; // `DependencyDeref` extends // `DependencyDefault` with cases where `Manager` is // a reference, if `DependencyImpl` is not defined. // // If `DependencyImpl` uses `DependencyManager`, then // this is already covered. Custom specializations might not cover this. // This template is specialized but does not have a primary definition. template class DependencyDeref; // Specialization of `DependencyDeref` when // `DependencyDefault` is defined: delegate to it. template class DependencyDeref< Handle, Manager, std::enable_if_t::value>> : public DependencyDefault { public: using DependencyDeref::DependencyDefault::DependencyDefault; protected: DependencyDeref(const DependencyDeref& that) = default; DependencyDeref& operator=(const DependencyDeref& that) = default; DependencyDeref(DependencyDeref&& that) = default; DependencyDeref& operator=(DependencyDeref&& that) = default; ~DependencyDeref() = default; }; // Specialization of `DependencyDeref` when // `DependencyDefault` is not defined, // `Manager` is a reference, and // `DependencyDefault>` is defined: // delegate to the latter. template class DependencyDeref< Handle, Manager, std::enable_if_t, std::negation>, SupportsDependencyDefault>>>> : public DependencyDefault> { public: using DependencyDeref::DependencyDefault::DependencyDefault; protected: DependencyDeref(const DependencyDeref& that) = default; DependencyDeref& operator=(const DependencyDeref& that) = default; DependencyDeref(DependencyDeref&& that) = default; DependencyDeref& operator=(DependencyDeref&& that) = default; ~DependencyDeref() = default; }; // `SupportsDependencyDeref::value` is `true` when // `DependencyDeref` is defined. template struct SupportsDependencyDeref : std::disjunction< SupportsDependencyDefault, std::conjunction, SupportsDependencyDefault< Handle, absl::remove_cvref_t>>> {}; } // namespace dependency_internal // `SupportsDependency::value` is `true` when // `Dependency` is defined and usable, i.e. constructible from // `Initializer`. // // An immovable `Manager` is usable when the `Initializer` has been // constructed from `riegeli::Maker()` or `riegeli::Invoker()`, not from an // already constructed object. template struct SupportsDependency : std::conjunction< dependency_internal::SupportsDependencyDeref> {}; // `TargetSupportsDependency::value` is `true` when // `Dependency>` is defined and constructible from // `Manager&&`. // // An immovable `TargetT` is usable when the `Dependency` has been // initialized with `riegeli::Maker()` or `riegeli::Invoker()`, possibly behind // `Initializer`, not from an already constructed object. template struct TargetSupportsDependency : std::conjunction< SupportsDependency>, std::is_convertible>>> {}; // `TargetRefSupportsDependency::value` is `true` when // `DependencyRef` i.e. // `Dependency>` is defined and constructible from // `Manager&&`. // // An immovable `TargetRefT` is usable when the `Dependency` has been // initialized with `riegeli::Maker()` or `riegeli::Invoker()`, possibly behind // `Initializer`, not from an already constructed object. template struct TargetRefSupportsDependency : std::conjunction< SupportsDependency>, std::is_convertible>>> {}; namespace dependency_internal { template struct IsConstexprBool : std::true_type {}; } // namespace dependency_internal // `HasStaticIsOwning::value` is `true` if `T` defines // `static constexpr bool kIsOwning`. template struct HasStaticIsOwning : std::false_type {}; template struct HasStaticIsOwning< T, std::enable_if_t::value>> : std::true_type {}; // Deriving a class from `PropagateStaticIsOwning` defines // `static constexpr bool kIsOwning = T::kIsOwning` if `T` defines `kIsOwning`. template class PropagateStaticIsOwning {}; template class PropagateStaticIsOwning::value>> { public: static constexpr bool kIsOwning = T::kIsOwning; }; namespace dependency_internal { template struct HasDynamicIsOwning : std::false_type {}; template struct HasDynamicIsOwning< T, std::enable_if_t().IsOwning()), bool>>> : std::true_type {}; // `DependencyDerived` adds `Dependency` and `StableDependency` operations // uniformly implemented in terms of other operations: `operator*`, // `operator->`, and comparisons against `nullptr`. // // It derives from the template parameter `Base` so that it can be used in // `Dependency` (applied to `DependencyDeref`) and `StableDependency` // (applied to `StableDependencyImpl`). template class DependencyDerived : public Base, public WithEqual> { public: using Base::Base; using Subhandle = decltype(std::declval().get()); template < typename DependentSubhandle = Subhandle, std::enable_if_t::value, int> = 0> decltype(*std::declval()) operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { Subhandle handle = this->get(); AssertNotNull(handle, "Failed precondition of Dependency::operator*: null handle"); return *std::move(handle); } template ::value, int> = 0> Subhandle operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { Subhandle handle = this->get(); AssertNotNull(handle, "Failed precondition of Dependency::operator->: null handle"); return handle; } template ::value, int> = 0> friend bool operator==(const DependencyDerived& a, std::nullptr_t) { return a.get() == nullptr; } template ::value, int> = 0> bool IsOwning() const { return Base::IsOwning(); } template < typename DependentBase = Base, std::enable_if_t< std::conjunction_v>, HasStaticIsOwning>, int> = 0> bool IsOwning() const { return Base::kIsOwning; } protected: DependencyDerived(const DependencyDerived& that) = default; DependencyDerived& operator=(const DependencyDerived& that) = default; DependencyDerived(DependencyDerived&& that) = default; DependencyDerived& operator=(DependencyDerived&& that) = default; ~DependencyDerived() = default; private: template ::value, int> = 0> static void AssertNotNull(Subhandle handle, absl::string_view message) { RIEGELI_ASSERT(handle != nullptr) << message; } template < typename DependentSubhandle = Subhandle, std::enable_if_t::value, int> = 0> static void AssertNotNull(ABSL_ATTRIBUTE_UNUSED Subhandle handle, ABSL_ATTRIBUTE_UNUSED absl::string_view message) {} }; } // namespace dependency_internal template class Dependency : public dependency_internal::DependencyDerived< dependency_internal::DependencyDeref, Handle, Manager> { public: using Dependency::DependencyDerived::DependencyDerived; Dependency(const Dependency& that) = default; Dependency& operator=(const Dependency& that) = default; Dependency(Dependency&& that) = default; Dependency& operator=(Dependency&& that) = default; }; // `DependencyRef` is an alias for // `Dependency>`. template using DependencyRef = Dependency>; namespace dependency_internal { // `AlwaysFalse::value` is `false`, but formally depends on `T...`. // This is useful for `static_assert()`. template struct AlwaysFalse : std::false_type {}; } // namespace dependency_internal // A placeholder `Dependency` manager to be deduced by CTAD, used to delete CTAD // for particular constructor argument types. // // It takes `ConstructorArgTypes` so that an error message from the // `static_assert()` can show them. template struct DeleteCtad { DeleteCtad() = delete; }; template class Dependency> { static_assert(dependency_internal::AlwaysFalse::value, "Template arguments must be written explicitly " "with these constructor argument types"); }; } // namespace riegeli #endif // RIEGELI_BASE_DEPENDENCY_H_ ================================================ FILE: riegeli/base/dependency_base.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_DEPENDENCY_BASE_H_ #define RIEGELI_BASE_DEPENDENCY_BASE_H_ #include #include #include #include "absl/base/attributes.h" #include "riegeli/base/initializer.h" #include "riegeli/base/maker.h" #include "riegeli/base/reset.h" namespace riegeli { // `RiegeliDependencySentinel(T*)` specifies how to initialize a default // `Manager` (for `Dependency`) or `Handle` (for `Any`) of type `T`. // // To customize that for a class `T`, define a free function // `friend Result RiegeliDependencySentinel(T*)` as a friend of `T` inside class // definition or in the same namespace as `T`, so that it can be found via ADL. // // `RiegeliDependencySentinel(T*)` returns a value convertible to // `Initializer`, usually a `MakerType`. // // The argument of `RiegeliDependencySentinel(T*)` is always a null pointer, // used to choose the right overload based on the type. inline MakerType<> RiegeliDependencySentinel(void*) { return {}; } // Implementation shared between most specializations of `DependencyManagerImpl` // and `DependencyImpl` which store `manager()` in a member variable. // // `DependencyBase` provides constructors, `Reset()`, `manager()`, `kIsStable`, // and protected `mutable_manager()`. template class DependencyBase { public: template (nullptr))), Initializer>, int> = 0> DependencyBase() noexcept : DependencyBase( RiegeliDependencySentinel(static_cast(nullptr))) {} explicit DependencyBase(Initializer manager) : manager_(std::move(manager)) {} template < typename DependentManager = Manager, std::enable_if_t< std::conjunction_v< std::is_convertible(nullptr))), Initializer>, std::is_move_assignable>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset() { Reset(RiegeliDependencySentinel(static_cast(nullptr))); } template < typename DependentManager = Manager, std::enable_if_t, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer manager) { riegeli::Reset(manager_, std::move(manager)); } Manager& manager() ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; } const Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; } static constexpr bool kIsStable = false; // Supports `MemoryEstimator`. template friend void RiegeliRegisterSubobjects(const DependencyBase* self, MemoryEstimator& memory_estimator) { memory_estimator.RegisterSubobjects(&self->manager_); } protected: DependencyBase(const DependencyBase& that) = default; DependencyBase& operator=(const DependencyBase& that) = default; DependencyBase(DependencyBase&& that) = default; DependencyBase& operator=(DependencyBase&& that) = default; ~DependencyBase() = default; Manager& mutable_manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; } private: mutable Manager manager_; }; // Specialization of `DependencyBase` for lvalue references. // // Only a subset of operations is provided: the dependency must be initialized, // and assignment is not supported. template class DependencyBase { public: explicit DependencyBase(Initializer manager) noexcept : manager_(std::move(manager)) {} Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; } static constexpr bool kIsStable = true; protected: DependencyBase(const DependencyBase& that) = default; DependencyBase& operator=(const DependencyBase&) = delete; ~DependencyBase() = default; Manager& mutable_manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; } private: Manager& manager_; }; // Specialization of `DependencyBase` for rvalue references. // // Only a subset of operations is provided: the dependency must be initialized, // and assignment is not supported. template class DependencyBase { public: explicit DependencyBase(Initializer manager) noexcept : manager_(std::move(manager)) {} Manager& manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; } static constexpr bool kIsStable = true; protected: DependencyBase(DependencyBase&& that) = default; DependencyBase& operator=(DependencyBase&&) = delete; ~DependencyBase() = default; Manager& mutable_manager() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return manager_; } private: Manager&& manager_; }; } // namespace riegeli #endif // RIEGELI_BASE_DEPENDENCY_BASE_H_ ================================================ FILE: riegeli/base/dependency_manager.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_DEPENDENCY_MANAGER_H_ #define RIEGELI_BASE_DEPENDENCY_MANAGER_H_ #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/meta/type_traits.h" #include "riegeli/base/dependency_base.h" namespace riegeli { // `DependencyManager` provides a preliminary interpretation of // `Manager` as a pointer or pointer-like type, in the form of a protected // member function `ptr()`. It is used by `DependencyImpl` specializations to // infer `get()`, which often returns `*ptr()` or `ptr()`, depending on which // of them is convertible to `Handle`. // // Examples: // * `T* DependencyManager::ptr()` // * `T* DependencyManager::ptr()` // * `T* DependencyManager::ptr()` // * `T* DependencyManager::ptr()` // * `std::nullptr_t DependencyManager::ptr()` // * `T* DependencyManager>::ptr()` // * `T* DependencyManager>::ptr()` // * `Handle DependencyManager>::ptr()` // // `DependencyManager` derives from // `DependencyManagerImpl` (where `ManagerStorage` is // `Manager`, `Manager&`, or `Manager&&`) which has specializations for various // `Manager` types. // // `DependencyManagerImpl` specializations often derive // from `DependencyBase` (or from `DependencyBase` if // `Manager` is cheap to move). // // `DependencyManager` provides what `DependencyBase` provides (constructors, // `Reset()`, `manager()`, and `kIsStable`), and also `ptr()`, `IsOwning()`, // and `kIsOwning`. // This template is specialized but does not have a primary definition. template class DependencyManagerImpl; // Specialization of `DependencyManagerImpl`: an unowned // dependency stored by pointer. template class DependencyManagerImpl : public DependencyBase { public: using DependencyManagerImpl::DependencyBase::DependencyBase; static constexpr bool kIsOwning = false; static constexpr bool kIsStable = true; protected: DependencyManagerImpl(const DependencyManagerImpl& that) = default; DependencyManagerImpl& operator=(const DependencyManagerImpl& that) = default; DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; T* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager(); } }; // Specialization of `DependencyManagerImpl`: // an unowned dependency stored by pointer, always missing. This is useful for // `Any` and `AnyRef`. template class DependencyManagerImpl : public DependencyBase { public: using DependencyManagerImpl::DependencyBase::DependencyBase; static constexpr bool kIsOwning = false; static constexpr bool kIsStable = true; protected: DependencyManagerImpl(const DependencyManagerImpl& that) = default; DependencyManagerImpl& operator=(const DependencyManagerImpl& that) = default; DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; std::nullptr_t ptr() const { return nullptr; } }; // Specialization of // `DependencyManagerImpl, ManagerStorage>`: // an owned dependency stored by `std::unique_ptr`. template class DependencyManagerImpl, ManagerStorage> : public DependencyBase< std::conditional_t, std::unique_ptr, ManagerStorage>> { public: using DependencyManagerImpl::DependencyBase::DependencyBase; bool IsOwning() const { return this->manager() != nullptr; } static constexpr bool kIsOwning = true; static constexpr bool kIsStable = true; protected: DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; T* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->manager().get(); } }; // Specialization of // `DependencyManagerImpl, ManagerStorage>`: // an owned dependency stored by `std::optional`. template class DependencyManagerImpl, ManagerStorage> : public DependencyBase { public: using DependencyManagerImpl::DependencyBase::DependencyBase; bool IsOwning() const { return this->manager() != std::nullopt; } static constexpr bool kIsOwning = true; protected: DependencyManagerImpl(const DependencyManagerImpl& that) = default; DependencyManagerImpl& operator=(const DependencyManagerImpl& that) = default; DependencyManagerImpl(DependencyManagerImpl&& that) = default; DependencyManagerImpl& operator=(DependencyManagerImpl&& that) = default; ~DependencyManagerImpl() = default; T* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { if (this->mutable_manager() == std::nullopt) return nullptr; return &*this->mutable_manager(); } }; namespace dependency_manager_internal { // `SupportsDependencyManagerImpl::value` is `true` when // `DependencyManagerImpl` is defined. template struct SupportsDependencyManagerImpl : std::false_type {}; template struct SupportsDependencyManagerImpl< Manager, std::void_t< decltype(std::declval&>() .manager())>> : std::true_type {}; } // namespace dependency_manager_internal // `DependencyManager` extends // `DependencyManagerImpl` with the basic case when // `Manager` is an owned dependency stored by value, and with specializations // when `Manager` is `T&` or `T&&`. template class DependencyManager; // Specialization of `DependencyManager` when // `DependencyManagerImpl` is defined: delegate to it. template class DependencyManager< Manager, std::enable_if_t>, dependency_manager_internal::SupportsDependencyManagerImpl>>> : public DependencyManagerImpl { public: using DependencyManager::DependencyManagerImpl::DependencyManagerImpl; static_assert( std::is_convertible_v< decltype(std::declval< typename DependencyManager::DependencyManagerImpl&>() .manager()), Manager&>, "DependencyManagerImpl::manager() " "must return Manager&"); protected: DependencyManager(const DependencyManager& that) = default; DependencyManager& operator=(const DependencyManager& that) = default; DependencyManager(DependencyManager&& that) = default; DependencyManager& operator=(DependencyManager&& that) = default; ~DependencyManager() = default; }; // Specialization of `DependencyManager` when // `DependencyManagerImpl` is not defined: an owned dependency stored // by value. template class DependencyManager< Manager, std::enable_if_t>, std::negation>>>> : public DependencyBase { public: using DependencyManager::DependencyBase::DependencyBase; static constexpr bool kIsOwning = true; protected: DependencyManager(const DependencyManager& that) = default; DependencyManager& operator=(const DependencyManager& that) = default; DependencyManager(DependencyManager&& that) = default; DependencyManager& operator=(DependencyManager&& that) = default; ~DependencyManager() = default; Manager* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return &this->mutable_manager(); } }; // Specialization of `DependencyManager` when // `DependencyManagerImpl>` is defined: // delegate to it, but store `absl::remove_cvref_t` by reference // to avoid moving it. // // This handles cases where `Manager` is deduced from a function parameter // as a reference type, but the type under the reference determines the // interpretation, e.g. `T*&`. template class DependencyManager< Manager&, std::enable_if_t>::value>> : public DependencyManagerImpl, absl::remove_cvref_t&> { public: using DependencyManager::DependencyManagerImpl::DependencyManagerImpl; static_assert( std::is_convertible_v< decltype(std::declval< typename DependencyManager::DependencyManagerImpl&>() .manager()), Manager&>, "DependencyManagerImpl::manager() " "must return Manager&"); protected: DependencyManager(const DependencyManager& that) = default; DependencyManager& operator=(const DependencyManager& that) = default; DependencyManager(DependencyManager&& that) = default; DependencyManager& operator=(DependencyManager&& that) = default; ~DependencyManager() = default; }; // Specialization of `DependencyManager` when // `DependencyManagerImpl>` is not defined: // an unowned dependency stored by lvalue reference. template class DependencyManager< Manager&, std::enable_if_t< !dependency_manager_internal::SupportsDependencyManagerImpl< absl::remove_cvref_t>::value>> : public DependencyBase { public: using DependencyManager::DependencyBase::DependencyBase; static constexpr bool kIsOwning = false; protected: DependencyManager(const DependencyManager& that) = default; DependencyManager& operator=(const DependencyManager&) = delete; ~DependencyManager() = default; Manager* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return &this->manager(); } }; // Specialization of `DependencyManager` when // `DependencyManagerImpl>` is defined: // delegate to it, but store `absl::remove_cvref_t` by reference // to avoid moving it. // // This handles cases where `Manager` is deduced from a function parameter // as a reference type, but the type under the reference determines the // interpretation, e.g. `std::unique_ptr&&`. template class DependencyManager< Manager&&, std::enable_if_t>::value>> : public DependencyManagerImpl, absl::remove_cvref_t&&> { public: using DependencyManager::DependencyManagerImpl::DependencyManagerImpl; static_assert( std::is_convertible_v< decltype(std::declval< typename DependencyManager::DependencyManagerImpl&>() .manager()), Manager&>, "DependencyManagerImpl::manager() " "must return Manager&"); protected: DependencyManager(const DependencyManager& that) = default; DependencyManager& operator=(const DependencyManager& that) = default; DependencyManager(DependencyManager&& that) = default; DependencyManager& operator=(DependencyManager&& that) = default; ~DependencyManager() = default; }; // Specialization of `DependencyManager` when // `DependencyManagerImpl>` is not defined: an // owned dependency stored by rvalue reference. template class DependencyManager< Manager&&, std::enable_if_t< !dependency_manager_internal::SupportsDependencyManagerImpl< absl::remove_cvref_t>::value>> : public DependencyBase { public: using DependencyManager::DependencyBase::DependencyBase; static constexpr bool kIsOwning = true; protected: DependencyManager(DependencyManager&& that) = default; DependencyManager& operator=(DependencyManager&&) = delete; ~DependencyManager() = default; Manager* ptr() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return &this->manager(); } }; namespace dependency_manager_internal { // Expose protected `DependencyManager::ptr()` for `DependencyManagerPtr`. template struct DependencyManagerAccess : DependencyManager { using DependencyManagerAccess::DependencyManager::ptr; }; // `DependencyManagerPtrImpl::type` is the type returned by // `DependencyManager::ptr()`. template struct DependencyManagerPtrImpl { using type = decltype(std::declval&>().ptr()); }; // In `DependencyManagerPtrImpl` for `Manager` stored by value, avoid // instantiating `DependencyManager` just to see what its `ptr()` would // return. This could lead to subtle compile errors, causing the following chain // of template instantiations: // // * `TargetRefSupportsDependency` // * `SupportsDependencyInit` // * `SupportsDependencyDeref` // * `SupportsDependencyDefault` // * `DependencyManagerPtr` // * `DependencyManager` // * `DependencyBase` // // which contains a member variable of an abstract type. template struct DependencyManagerPtrImpl< Manager, std::enable_if_t>, std::negation>>>> { using type = Manager*; }; template struct DependencyManagerRefImpl { using type = Manager; }; template struct DependencyManagerRefImpl { using type = Manager&; }; } // namespace dependency_manager_internal // `DependencyManagerPtr` is the type returned by // `DependencyManager::ptr()`. template using DependencyManagerPtr = typename dependency_manager_internal::DependencyManagerPtrImpl< Manager>::type; // `DependencyManagerRef` is `DependencyManagerPtr` with the // toplevel pointer changed to lvalue reference, if any. // // This should normally be used under the condition that // `std::is_pointer_v>`. template using DependencyManagerRef = typename dependency_manager_internal::DependencyManagerRefImpl< DependencyManagerPtr>::type; } // namespace riegeli #endif // RIEGELI_BASE_DEPENDENCY_MANAGER_H_ ================================================ FILE: riegeli/base/errno_mapping.cc ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #endif #include "riegeli/base/errno_mapping.h" #ifdef _WIN32 #include #include #endif #include #include "absl/status/status.h" #ifdef _WIN32 #include "absl/strings/str_cat.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/unicode.h" #endif namespace riegeli { #ifdef _WIN32 namespace { absl::StatusCode WindowsErrorToStatusCode(DWORD error_number) { switch (error_number) { case ERROR_SUCCESS: return absl::StatusCode::kOk; case ERROR_OPERATION_ABORTED: return absl::StatusCode::kCancelled; case ERROR_INVALID_HANDLE: case ERROR_INVALID_PARAMETER: case ERROR_BUFFER_OVERFLOW: case ERROR_INVALID_NAME: case ERROR_NEGATIVE_SEEK: case ERROR_DIRECTORY: case ERROR_REPARSE_TAG_INVALID: case WSAEFAULT: case WSAEINVAL: case WSAENAMETOOLONG: return absl::StatusCode::kInvalidArgument; case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: case ERROR_INVALID_DRIVE: case ERROR_BAD_UNIT: case ERROR_BAD_NETPATH: case ERROR_DEV_NOT_EXIST: case ERROR_BAD_PATHNAME: return absl::StatusCode::kNotFound; case ERROR_FILE_EXISTS: case ERROR_ALREADY_EXISTS: return absl::StatusCode::kAlreadyExists; case ERROR_ACCESS_DENIED: case ERROR_INVALID_ACCESS: case ERROR_CURRENT_DIRECTORY: case ERROR_WRITE_PROTECT: case ERROR_SHARING_VIOLATION: case ERROR_CANNOT_MAKE: case ERROR_NOACCESS: case WSAEACCES: return absl::StatusCode::kPermissionDenied; case ERROR_TOO_MANY_OPEN_FILES: case ERROR_NOT_ENOUGH_MEMORY: case ERROR_OUTOFMEMORY: case ERROR_HANDLE_DISK_FULL: case ERROR_DISK_FULL: case WSAEMFILE: return absl::StatusCode::kResourceExhausted; case ERROR_BROKEN_PIPE: case ERROR_BUSY_DRIVE: case ERROR_DIR_NOT_EMPTY: case ERROR_BUSY: case ERROR_OPEN_FILES: case ERROR_DEVICE_IN_USE: case WSAEBADF: return absl::StatusCode::kFailedPrecondition; case ERROR_HANDLE_EOF: return absl::StatusCode::kOutOfRange; case ERROR_INVALID_FUNCTION: case ERROR_NOT_SUPPORTED: return absl::StatusCode::kUnimplemented; case ERROR_NOT_READY: case ERROR_LOCK_VIOLATION: case ERROR_LOCKED: case ERROR_RETRY: case WSAEINTR: return absl::StatusCode::kUnavailable; default: return absl::StatusCode::kUnknown; } } } // namespace #endif // _WIN32 int StatusCodeToErrno(absl::StatusCode status_code) { switch (status_code) { case absl::StatusCode::kOk: return 0; case absl::StatusCode::kCancelled: return ECANCELED; case absl::StatusCode::kUnknown: return EIO; case absl::StatusCode::kInvalidArgument: return EINVAL; case absl::StatusCode::kDeadlineExceeded: return ETIMEDOUT; case absl::StatusCode::kNotFound: return ENOENT; case absl::StatusCode::kAlreadyExists: return EEXIST; case absl::StatusCode::kPermissionDenied: return EACCES; case absl::StatusCode::kResourceExhausted: return ENOSPC; case absl::StatusCode::kFailedPrecondition: // Does not round trip: // `absl::ErrnoToStatusCode(EINVAL) == absl::StatusCode::kInvalidArgument` return EINVAL; case absl::StatusCode::kAborted: return EDEADLK; case absl::StatusCode::kOutOfRange: return ERANGE; case absl::StatusCode::kUnimplemented: return ENOTSUP; case absl::StatusCode::kInternal: // Does not round trip: // `absl::ErrnoToStatusCode(EIO) == absl::StatusCode::kUnknown` return EIO; case absl::StatusCode::kUnavailable: return EAGAIN; case absl::StatusCode::kDataLoss: // Does not round trip: // `absl::ErrnoToStatusCode(EIO) == absl::StatusCode::kUnknown` return EIO; case absl::StatusCode::kUnauthenticated: // Does not round trip: // `absl::ErrnoToStatusCode(EACCES) == // absl::StatusCode::kPermissionDenied` return EACCES; default: // Does not round trip: // `absl::ErrnoToStatusCode(EIO) == absl::StatusCode::kUnknown` return EIO; } } #ifdef _WIN32 absl::Status WindowsErrorToStatus(uint32_t error_number, absl::string_view message) { LPWSTR os_message; const DWORD length = FormatMessageW( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, IntCast(error_number), 0, reinterpret_cast(&os_message), 0, nullptr); const absl::Status status( WindowsErrorToStatusCode(IntCast(error_number)), absl::StrCat(message, ": ", WideToUtf8Lossy(absl::MakeConstSpan( os_message, IntCast(length))))); LocalFree(os_message); return status; } #endif // _WIN32 } // namespace riegeli ================================================ FILE: riegeli/base/errno_mapping.h ================================================ // Copyright 2018 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ERRNO_MAPPING_H_ #define RIEGELI_BASE_ERRNO_MAPPING_H_ #ifdef _WIN32 #include #endif #include "absl/status/status.h" #ifdef _WIN32 #include "absl/strings/string_view.h" #endif namespace riegeli { // Converts `absl::StatusCode` to `errno` value. int StatusCodeToErrno(absl::StatusCode status_code); #ifdef _WIN32 absl::Status WindowsErrorToStatus(uint32_t error_number, absl::string_view message); #endif // _WIN32 } // namespace riegeli #endif // RIEGELI_BASE_ERRNO_MAPPING_H_ ================================================ FILE: riegeli/base/estimated_allocated_size.h ================================================ // Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_ #define RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_ #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "riegeli/base/arithmetic.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // Returns the estimated size which will be allocated when requesting to // allocate `requested_size`. inline size_t EstimatedAllocatedSize(size_t requested_size) { // Placeholder for asking the memory manager, which might be possible on some // platforms. return RoundUp<2 * sizeof(void*)>( UnsignedMax(requested_size, 4 * sizeof(void*))); } // Returns the estimated size which was allocated at `ptr` when requested to // allocate `requested_size`. inline size_t EstimatedAllocatedSize(ABSL_ATTRIBUTE_UNUSED const void* ptr, size_t requested_size) { // Placeholder for using `ptr`, which might be possible on some platforms. return EstimatedAllocatedSize(requested_size); } // A deterministic variant of `EstimatedAllocatedSize()`, useful for testing. inline size_t EstimatedAllocatedSizeForTesting(size_t requested_size) { return 16 + requested_size; } } // namespace riegeli #endif // RIEGELI_BASE_ESTIMATED_ALLOCATED_SIZE_H_ ================================================ FILE: riegeli/base/external_data.cc ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "riegeli/base/external_data.h" #include #include "absl/strings/string_view.h" namespace riegeli { ExternalData ExternalDataCopy(absl::string_view data) { char* storage = nullptr; if (!data.empty()) { storage = static_cast(operator new(data.size())); std::memcpy(storage, data.data(), data.size()); } return ExternalData{ExternalStorage(storage, operator delete), absl::string_view(storage, data.size())}; } } // namespace riegeli ================================================ FILE: riegeli/base/external_data.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_EXTERNAL_DATA_H_ #define RIEGELI_BASE_EXTERNAL_DATA_H_ #include #include #include "absl/strings/string_view.h" namespace riegeli { // Type-erased external object with its deleter. // // `ExternalStorage` can be decomposed with `void* ExternalStorage::release()` // and `ExternalStorage::get_deleter() -> void (*)(void*)`. using ExternalStorage = std::unique_ptr; // Supports `ExternalRef`. inline ExternalStorage RiegeliToExternalStorage(ExternalStorage* self) { return std::move(*self); } // Type-erased external object with its deleter and a substring of a byte array // it owns. struct ExternalData { /*implicit*/ operator absl::string_view() const { return substr; } // Indicates support for: // * `ExternalRef(ExternalData&&)` // * `ExternalRef(ExternalData&&, substr)` friend void RiegeliSupportsExternalRef(ExternalData*) {} // Supports `ExternalRef`. friend ExternalStorage RiegeliToExternalStorage(ExternalData* self) { return std::move(self->storage); } ExternalStorage storage; // Must outlive usages of `substr`. absl::string_view substr; }; // Creates `ExternalData` holding a copy of `data`. ExternalData ExternalDataCopy(absl::string_view data); } // namespace riegeli #endif // RIEGELI_BASE_EXTERNAL_DATA_H_ ================================================ FILE: riegeli/base/external_ref.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_EXTERNAL_REF_H_ #define RIEGELI_BASE_EXTERNAL_REF_H_ #include "riegeli/base/chain_base.h" // IWYU pragma: keep #include "riegeli/base/chain_details.h" // IWYU pragma: keep #include "riegeli/base/external_ref_base.h" // IWYU pragma: export #include "riegeli/base/external_ref_support.h" // IWYU pragma: export #endif // RIEGELI_BASE_EXTERNAL_REF_H_ ================================================ FILE: riegeli/base/external_ref_base.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_EXTERNAL_REF_BASE_H_ #define RIEGELI_BASE_EXTERNAL_REF_BASE_H_ // IWYU pragma: private, include "riegeli/base/external_ref.h" #include #include #include #include // IWYU pragma: keep #include #include #include "absl/base/attributes.h" #include "absl/meta/type_traits.h" #include "absl/strings/cord.h" #include "absl/strings/string_view.h" #include "riegeli/base/assert.h" #include "riegeli/base/bytes_ref.h" #include "riegeli/base/chain_base.h" #include "riegeli/base/cord_utils.h" #include "riegeli/base/external_data.h" #include "riegeli/base/external_ref_support.h" #include "riegeli/base/initializer.h" #include "riegeli/base/temporary_storage.h" namespace riegeli { // `ExternalRef` specifies a byte array in a way which allows sharing it with // other objects without copying if that is considered more efficient than // copying. It mediates between the producer and the consumer of the data during // transfer; it is not suitable for longer storage. Creating an `ExternalRef` is // usually more efficient than creating a `Chain` or `absl::Cord` if the data // will ultimately be copied rather than shared. // // `ExternalRef` is constructed from an object of some type `T` which owns the // data, or from `Initializer`. The latter allows to skip constructing the // object if the data are known beforehand, will ultimately be copied, and the // constructed object is not needed otherwise. // // `ExternalRef` can be converted to `absl::string_view`, `Chain`, `absl::Cord`, // or `ExternalData`, or assigned, appended, or prepended to a `Chain` or // `absl::Cord`. Apart from conversion to `absl::string_view` it can be consumed // at most once. // // In contrast to `Chain::Block` and `absl::MakeCordFromExternal()`, // `ExternalRef` chooses between sharing the object and copying the data, // depending on the size of the data, the method of consuming the data, // and the state of the destination for appending or prepending. // // `ExternalRef` itself does not own the object description nor the data, and is // efficiently movable. The state is stored in a storage object passed as a // default argument to the original `ExternalRef` constructor. // // The expected interface of the object which owns the data is a superset of the // interfaces expected by `Chain::Block` and `absl::MakeCordFromExternal()`. // // `ExternalRef` constructors require the external object type to indicate // that it supports `ExternalRef` by providing one of the following functions // (only their presence is checked, they are never called): // ``` // // Indicates support for `ExternalRef(T&&, substr)`. // // // // `substr` must be owned by the object if it gets created or moved, unless // // `RiegeliExternalCopy()` (see below) recognizes cases when it is not. // // // // If `T` is convertible to `BytesRef`, then also indicates support for // // `ExternalRef(T&&)`. // // // // The parameter can also have type `const T*`. This also indicates // // support for `ExternalRef(const T&, substr)` and possibly // // `ExternalRef(const T&)`, i.e. that `T` is copyable and copying it is // // more efficient than copying the data. // // // // If the `ExternalRef` is later converted to `absl::Cord` and // // `absl::MakeCordFromExternal()` gets used, then this avoids an allocation // // by taking advantage of the promise that `substr` will be owned also by // // the moved object (`absl::MakeCordFromExternal()` requires knowing the // // data before specifying the object to be moved). // friend void RiegeliSupportsExternalRef(T*) {} // // // Indicates support for `ExternalRef(T&&)`, as long as `T` is convertible // // to `BytesRef`. // // // // The parameter can also have type `const T*`. This also indicates support // // for `ExternalRef(const T&)`, i.e. that `T` is copyable and copying it is // // more efficient than copying the data. // friend void RiegeliSupportsExternalRefWhole(T*) {} // ``` // // `ExternalRef::From()` are like `ExternalRef` constructors, but // `RiegeliSupportsExternalRef()` or `RiegeliSupportsExternalRefWhole()` is not // needed. The caller is responsible for using an appropriate type of the // external object. // // `T` may also support the following member functions, either with or without // the `substr` parameter, with the following definitions assumed by default: // ``` // // Called once before the destructor, except on a moved-from object. // // If only this function is needed, `T` can be a lambda. // // // // If this is present, the object will be created unconditionally, because // // calling this might be needed to delete resources which already exist. // // // // This can also be a const method. If this is not a const method and the // // object is passed by const or lvalue reference, this will be called on a // // mutable copy of the object. // void operator()(absl::string_view substr) && {} // // // If this returns `true`, the data will be copied instead of wrapping the // // object. The data does not need to be stable while the object is moved. // // `RiegeliToChainBlock()`, `RiegeliToCord()`, `RiegeliToExternalData()`, // // `RiegeliToExternalStorage()`, nor `RiegeliExternalDelegate()` will not // // be called. // // // // Typically this indicates an object with short data stored inline. // friend bool RiegeliExternalCopy(const T* self) { return false; } // // // Converts `*self` or its `substr` to `Chain::Block`, if this can be done // // more efficiently than with `Chain::Block` constructor. Can modify // // `*self`. `operator()` will no longer be called. // // // // The `self` parameter can also have type `const T*`. If it has type `T*` // // and the object is passed by const or lvalue reference, this will be // // called on a mutable copy of the object. // // // // If the `substr` parameter was given to `ExternalRef` constructor, the // // `substr` parameter is required here, otherwise it is optional. // friend Chain::Block RiegeliToChainBlock(T* self, absl::string_view substr); // // // Converts `*self` or its `substr` to `absl::Cord`, if this can be done // // more efficiently than with `absl::MakeCordFromExternal()`. Can modify // // `*self`. `operator()` will no longer be called. // // // // The `self` parameter can also have type `const T*`. If it has type `T*` // // and the object is passed by const or lvalue reference, this will be // // called on a mutable copy of the object. // // // // If the `substr` parameter was given to `ExternalRef` constructor, the // // `substr` parameter is required here, otherwise it is optional. // friend absl::Cord RiegeliToCord(T* self, absl::string_view substr); // // // Converts `*self` to `ExternalData`, if this can be done more efficiently // // than allocating the object on the heap, e.g. if the object fits in a // // pointer. Can modify `*self`. `operator()` will no longer be called. // // // // The `self` parameter can also have type `const T*`. If it has type `T*` // // and the object is passed by const or lvalue reference, this will be // // called on a mutable copy of the object. // // // // If the `substr` parameter was given to `ExternalRef` constructor, the // // `substr` parameter is required here, otherwise it is optional. // friend ExternalData RiegeliToExternalData(T* self, // absl::string_view substr); // // // This can be defined instead of `RiegeliToExternalData()` with `substr`, // // which would return `ExternalData` with the same `substr`. // friend ExternalStorage RiegeliToExternalStorage(T* self); // // // If defined, indicates a subobject to wrap instead of the whole object. // // It must call `std::forward(delegate_to)(subobject)` or // // `std::forward(delegate_to)(subobject, substr)`, preferably // // with `std::move(subobject)`. `delegate_to` must be called exactly once. // // // // Typically this indicates a smaller object which is sufficient to keep // // the data alive, or the active variant if the object stores one of // // multiple subobjects. // // // // `RiegeliToChainBlock()`, `RiegeliToCord()`, `RiegeliToExternalData()`, // // and `RiegeliToExternalStorage()`, if defined, are used in preference to // // this. // // // // The subobject will be processed like by `ExternalRef::From()`, including // // the possibility of further delegation, except that `Initializer` is not // // supported. The subobject must not be `*self`. // // // // The `self` parameter can also have type `const T*`. If it has type `T*` // // and the object is passed by const or lvalue reference, this will be // // called on a mutable copy of the object. // // // // The `substr` parameter is optional here. If absent here while the // // `substr` parameter was given to `ExternalRef` constructor, then it is // // propagated. If absent here while the `substr` parameter was not given // // to `ExternalRef` constructor, then `subobject` must be convertible to // // `BytesRef`. If present here, it can be used to specify the data if // // `subobject` is not convertible to `BytesRef`. // template // friend void RiegeliExternalDelegate(T* self, absl::string_view substr, // Callback&& delegate_to); // // // Shows internal structure in a human-readable way, for debugging. // // // // Used for conversion to `Chain`. // friend void RiegeliDumpStructure(const T* self, absl::string_view substr, // std::ostream& dest) { // dest << "[external] { }"; // } // // // Registers this object with `MemoryEstimator`. // // // // By default calls `memory_estimator.RegisterUnknownType()` and // // as an approximation of memory usage of an unknown type, registers just // // the stored `substr` if unique. // // // // Used for conversion to `Chain`. // friend void RiegeliRegisterSubobjects( // const T* self, riegeli::MemoryEstimator& memory_estimator); // ``` // // The `substr` parameter of these member functions, if present, will get the // `substr` parameter passed to `ExternalRef` constructor. Having `substr` // available in these functions might avoid storing `substr` in the external // object. class ExternalRef { private: using UseStringViewFunction = void (*)(void* context, absl::string_view data); using UseChainBlockFunction = void (*)(void* context, Chain::Block data); using UseCordFunction = void (*)(void* context, absl::Cord data); using UseExternalDataFunction = void (*)(void* context, ExternalData data); template struct HasCallOperatorSubstr : std::false_type {}; template struct HasCallOperatorSubstr()( std::declval()))>> : std::true_type {}; template struct HasCallOperatorWhole : std::false_type {}; template struct HasCallOperatorWhole()())>> : std::true_type {}; template struct HasCallOperator : std::disjunction, HasCallOperatorWhole> {}; template ::value, int> = 0> static void CallOperatorWhole(T&& object) { const absl::string_view data = BytesRef(object); std::forward(object)(data); } template >, HasCallOperatorWhole>, int> = 0> static void CallOperatorWhole(T&& object) { std::forward(object)(); } template < typename T, std::enable_if_t< std::conjunction_v>, HasCallOperatorSubstr>>, int> = 0> static void CallOperatorWhole(T&& object) { absl::remove_cvref_t copy(object); const absl::string_view data = BytesRef(copy); std::move(copy)(data); } template < typename T, std::enable_if_t< std::conjunction_v< std::negation>, std::negation>>, HasCallOperatorWhole>>, int> = 0> static void CallOperatorWhole(T&& object) { (absl::remove_cvref_t(object))(); } template >::value, int> = 0> static void CallOperatorWhole(ABSL_ATTRIBUTE_UNUSED T&& object) {} template ::value, int> = 0> static void CallOperatorSubstr(T&& object, absl::string_view substr) { std::forward(object)(substr); } template >, HasCallOperatorWhole>, int> = 0> static void CallOperatorSubstr( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) { std::forward(object)(); } template < typename T, std::enable_if_t< std::conjunction_v>, HasCallOperatorSubstr>>, int> = 0> static void CallOperatorSubstr( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) { absl::remove_cvref_t copy(object); const absl::string_view data = BytesRef(copy); std::move(copy)(data); } template < typename T, std::enable_if_t< std::conjunction_v< std::negation>, std::negation>>, HasCallOperatorWhole>>, int> = 0> static void CallOperatorSubstr( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) { (absl::remove_cvref_t(object))(); } template >::value, int> = 0> static void CallOperatorSubstr( ABSL_ATTRIBUTE_UNUSED T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) {} template static external_ref_internal::PointerTypeT Pointer(T&& object) { return &object; } #if RIEGELI_DEBUG template , int> = 0> static void AssertSubstr(const T& object, absl::string_view substr) { if (!substr.empty()) { const BytesRef whole = object; RIEGELI_ASSERT(std::greater_equal<>()(substr.data(), whole.data())) << "Failed precondition of ExternalRef: " "substring not contained in whole data"; RIEGELI_ASSERT(std::less_equal<>()(substr.data() + substr.size(), whole.data() + whole.size())) << "Failed precondition of ExternalRef: " "substring not contained in whole data"; } } template < typename T, std::enable_if_t, int> = 0> #else template #endif static void AssertSubstr(ABSL_ATTRIBUTE_UNUSED const T& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr) { } template struct HasRiegeliExternalDelegateWhole : std::false_type {}; template struct HasRiegeliExternalDelegateWhole< T, Callback, std::void_t>(), std::declval()))>> : std::true_type {}; template struct HasRiegeliExternalDelegateSubstr : std::false_type {}; template struct HasRiegeliExternalDelegateSubstr< T, Callback, std::void_t>(), std::declval(), std::declval()))>> : std::true_type {}; template struct HasExternalDelegateWhole : std::disjunction, HasRiegeliExternalDelegateSubstr> {}; template ::value, int> = 0> static void ExternalDelegateWhole(T&& object, Callback&& delegate_to) { RiegeliExternalDelegate(ExternalRef::Pointer(std::forward(object)), std::forward(delegate_to)); } template >, HasRiegeliExternalDelegateSubstr>, int> = 0> static void ExternalDelegateWhole(T&& object, Callback&& delegate_to) { const absl::string_view data = BytesRef(object); RiegeliExternalDelegate(ExternalRef::Pointer(std::forward(object)), data, std::forward(delegate_to)); } template ::value, int> = 0> static void ExternalDelegateWhole( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data, Callback&& delegate_to) { RiegeliExternalDelegate(ExternalRef::Pointer(std::forward(object)), std::forward(delegate_to)); } template >, HasRiegeliExternalDelegateSubstr>, int> = 0> static void ExternalDelegateWhole(T&& object, absl::string_view data, Callback&& delegate_to) { RiegeliExternalDelegate(ExternalRef::Pointer(std::forward(object)), data, std::forward(delegate_to)); } template struct HasExternalDelegateSubstr : std::disjunction, HasRiegeliExternalDelegateWhole> {}; template ::value, int> = 0> static void ExternalDelegateSubstr(T&& object, absl::string_view substr, Callback&& delegate_to) { RiegeliExternalDelegate(ExternalRef::Pointer(std::forward(object)), substr, std::forward(delegate_to)); } template < typename T, typename Callback, std::enable_if_t< std::conjunction_v< std::negation>, HasRiegeliExternalDelegateWhole>, int> = 0> static void ExternalDelegateSubstr( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view substr, Callback&& delegate_to) { RiegeliExternalDelegate(ExternalRef::Pointer(std::forward(object)), std::forward(delegate_to)); } template struct HasRiegeliToChainBlockWhole : std::false_type {}; template struct HasRiegeliToChainBlockWhole< T, std::enable_if_t>())), Chain::Block>>> : std::true_type {}; template struct HasRiegeliToChainBlockSubstr : std::false_type {}; template struct HasRiegeliToChainBlockSubstr< T, std::enable_if_t>(), std::declval())), Chain::Block>>> : std::true_type {}; template struct HasToChainBlockWhole : std::disjunction, HasRiegeliToChainBlockSubstr> {}; template ::value, int> = 0> static Chain::Block ToChainBlockWhole(T&& object) { return RiegeliToChainBlock(ExternalRef::Pointer(std::forward(object))); } template >, HasRiegeliToChainBlockSubstr>, int> = 0> static Chain::Block ToChainBlockWhole(T&& object) { const absl::string_view data = BytesRef(object); return RiegeliToChainBlock(ExternalRef::Pointer(std::forward(object)), data); } template ::value, int> = 0> static Chain::Block ToChainBlockWhole( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) { return RiegeliToChainBlock(ExternalRef::Pointer(std::forward(object))); } template >, HasRiegeliToChainBlockSubstr>, int> = 0> static Chain::Block ToChainBlockWhole(T&& object, absl::string_view data) { return RiegeliToChainBlock(ExternalRef::Pointer(std::forward(object)), data); } template using HasToChainBlockSubstr = HasRiegeliToChainBlockSubstr; template ::value, int> = 0> static Chain::Block ToChainBlockSubstr(T&& object, absl::string_view substr) { return RiegeliToChainBlock(ExternalRef::Pointer(std::forward(object)), substr); } template class ConverterToChainBlockWhole { public: ConverterToChainBlockWhole(const ConverterToChainBlockWhole&) = delete; ConverterToChainBlockWhole& operator=(const ConverterToChainBlockWhole&) = delete; template < typename SubT, std::enable_if_t, int> = 0> void operator()(SubT&& subobject) && { // The constructor processes the subobject. const absl::string_view data = BytesRef(subobject); ConverterToChainBlockWhole converter( std::forward(subobject), data, context_, use_string_view_, use_chain_block_); } template void operator()(SubT&& subobject, absl::string_view substr) && { // The constructor processes the subobject. ConverterToChainBlockSubstr converter( std::forward(subobject), substr, context_, use_string_view_, use_chain_block_); } private: friend class ExternalRef; ABSL_ATTRIBUTE_ALWAYS_INLINE explicit ConverterToChainBlockWhole(T&& object, absl::string_view data, void* context, UseStringViewFunction use_string_view, UseChainBlockFunction use_chain_block) : context_(context), use_string_view_(use_string_view), use_chain_block_(use_chain_block) { if (RiegeliExternalCopy(&object)) { use_string_view_(context_, data); ExternalRef::CallOperatorWhole(std::forward(object)); return; } std::move(*this).Callback(std::forward(object), data); } template < typename DependentT = T, std::enable_if_t::value, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object, absl::string_view data) && { use_chain_block_(context_, ExternalRef::ToChainBlockWhole( std::forward(object), data)); } template >, HasToChainBlockWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { use_chain_block_(context_, ExternalRef::ToChainBlockWhole( absl::remove_cvref_t(object))); } template >>, HasExternalDelegateWhole< DependentT, ConverterToChainBlockWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object, absl::string_view data) && { ExternalRef::ExternalDelegateWhole(std::forward(object), data, std::move(*this)); } template >>, std::negation>, HasExternalDelegateWhole, ConverterToChainBlockWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { ExternalRef::ExternalDelegateWhole(absl::remove_cvref_t(object), std::move(*this)); } template >>, std::negation, ConverterToChainBlockWhole>>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { use_chain_block_(context_, Chain::Block(std::forward(object))); } void* context_; UseStringViewFunction use_string_view_; UseChainBlockFunction use_chain_block_; }; template class ConverterToChainBlockSubstr { public: ConverterToChainBlockSubstr(const ConverterToChainBlockSubstr&) = delete; ConverterToChainBlockSubstr& operator=(const ConverterToChainBlockSubstr&) = delete; template void operator()(SubT&& subobject) && { std::move (*this)(std::forward(subobject), substr_); } template void operator()(SubT&& subobject, absl::string_view substr) && { RIEGELI_ASSERT_EQ(substr_.size(), substr.size()) << "ExternalRef: size mismatch"; // The constructor processes the subobject. ConverterToChainBlockSubstr converter( std::forward(subobject), substr, context_, use_string_view_, use_chain_block_); } private: friend class ExternalRef; ABSL_ATTRIBUTE_ALWAYS_INLINE explicit ConverterToChainBlockSubstr(T&& object, absl::string_view substr, void* context, UseStringViewFunction use_string_view, UseChainBlockFunction use_chain_block) : substr_(substr), context_(context), use_string_view_(use_string_view), use_chain_block_(use_chain_block) { AssertSubstr(object, substr_); if (RiegeliExternalCopy(&object)) { use_string_view_(context_, substr_); ExternalRef::CallOperatorSubstr(std::forward(object), substr_); return; } std::move(*this).Callback(std::forward(object)); } template < typename DependentT = T, std::enable_if_t::value, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_chain_block_(context_, ExternalRef::ToChainBlockSubstr( std::forward(object), substr_)); } template >, HasToChainBlockSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_chain_block_(context_, ExternalRef::ToChainBlockSubstr( absl::remove_cvref_t(object), substr_)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v>>, HasExternalDelegateSubstr< DependentT, ConverterToChainBlockSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { ExternalRef::ExternalDelegateSubstr(std::forward(object), substr_, std::move(*this)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation< HasToChainBlockSubstr>>, std::negation>, HasExternalDelegateSubstr, ConverterToChainBlockSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { ExternalRef::ExternalDelegateSubstr(absl::remove_cvref_t(object), substr_, std::move(*this)); } template >>, std::negation, ConverterToChainBlockSubstr>>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_chain_block_(context_, Chain::Block(std::forward(object), substr_)); } absl::string_view substr_; void* context_; UseStringViewFunction use_string_view_; UseChainBlockFunction use_chain_block_; }; template class ObjectForCordWhole { public: explicit ObjectForCordWhole(Initializer object) : ptr_(std::move(object)) {} ObjectForCordWhole(ObjectForCordWhole&& that) = default; ObjectForCordWhole& operator=(ObjectForCordWhole&& that) = default; void operator()(absl::string_view substr) && { ExternalRef::CallOperatorSubstr(std::move(*ptr_), substr); } T& operator*() { return *ptr_; } const T& operator*() const { return *ptr_; } private: // Wrapped in `std::unique_ptr` so that the data are stable. // `absl::MakeCordFromExternal()` requires the data to be known beforehand // and valid for the moved external object. std::unique_ptr ptr_; }; template class ObjectForCordSubstr { public: explicit ObjectForCordSubstr(Initializer object, absl::string_view substr) : object_(std::move(object)) { AssertSubstr(**this, substr); } ObjectForCordSubstr(ObjectForCordSubstr&& that) = default; ObjectForCordSubstr& operator=(ObjectForCordSubstr&& that) = default; void operator()(absl::string_view substr) && { ExternalRef::CallOperatorSubstr(std::move(object_), substr); } T& operator*() { return object_; } const T& operator*() const { return object_; } private: ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS T object_; }; template struct HasRiegeliToCordWhole : std::false_type {}; template struct HasRiegeliToCordWhole< T, std::enable_if_t>())), absl::Cord>>> : std::true_type {}; template struct HasRiegeliToCordSubstr : std::false_type {}; template struct HasRiegeliToCordSubstr< T, std::enable_if_t>(), std::declval())), absl::Cord>>> : std::true_type {}; template struct HasToCordWhole : std::disjunction, HasRiegeliToCordSubstr> { }; template ::value, int> = 0> static absl::Cord ToCordWhole(T&& object) { return RiegeliToCord(ExternalRef::Pointer(std::forward(object))); } template >, HasRiegeliToCordSubstr>, int> = 0> static absl::Cord ToCordWhole(T&& object) { const absl::string_view data = BytesRef(object); return RiegeliToCord(ExternalRef::Pointer(std::forward(object)), data); } template ::value, int> = 0> static absl::Cord ToCordWhole(T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) { return RiegeliToCord(ExternalRef::Pointer(std::forward(object))); } template >, HasRiegeliToCordSubstr>, int> = 0> static absl::Cord ToCordWhole(T&& object, absl::string_view data) { return RiegeliToCord(ExternalRef::Pointer(std::forward(object)), data); } template using HasToCordSubstr = HasRiegeliToCordSubstr; template ::value, int> = 0> static absl::Cord ToCordSubstr(T&& object, absl::string_view substr) { return RiegeliToCord(ExternalRef::Pointer(std::forward(object)), substr); } template class ConverterToCordWhole { public: ConverterToCordWhole(const ConverterToCordWhole&) = delete; ConverterToCordWhole& operator=(const ConverterToCordWhole&) = delete; template < typename SubT, std::enable_if_t, int> = 0> void operator()(SubT&& subobject) && { // The constructor processes the subobject. const absl::string_view data = BytesRef(subobject); ConverterToCordWhole converter(std::forward(subobject), data, context_, use_string_view_, use_cord_); } template void operator()(SubT&& subobject, absl::string_view substr) && { // The constructor processes the subobject. ConverterToCordSubstr converter(std::forward(subobject), substr, context_, use_string_view_, use_cord_); } private: friend class ExternalRef; ABSL_ATTRIBUTE_ALWAYS_INLINE explicit ConverterToCordWhole(T&& object, absl::string_view data, void* context, UseStringViewFunction use_string_view, UseCordFunction use_cord) : context_(context), use_string_view_(use_string_view), use_cord_(use_cord) { if (RiegeliExternalCopy(&object)) { use_string_view_(context_, data); ExternalRef::CallOperatorWhole(std::forward(object)); return; } std::move(*this).Callback(std::forward(object), data); } template ::value, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object, absl::string_view data) && { use_cord_(context_, ExternalRef::ToCordWhole(std::forward(object), data)); } template < typename DependentT = T, std::enable_if_t>, HasToCordWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { use_cord_(context_, ExternalRef::ToCordWhole(absl::remove_cvref_t( std::forward(object)))); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation>>, HasExternalDelegateWhole>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object, absl::string_view data) && { ExternalRef::ExternalDelegateWhole(std::forward(object), data, std::move(*this)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation>>, std::negation< HasExternalDelegateWhole>, HasExternalDelegateWhole, ConverterToCordWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { ExternalRef::ExternalDelegateWhole(absl::remove_cvref_t(object), std::move(*this)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation>>, std::negation, ConverterToCordWhole>>, SupportsExternalRefSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object, absl::string_view data) && { // If the type indicates that substrings are stable, then // `ObjectForCordSubstr` can be used instead of `ObjectForCordWhole`. use_cord_(context_, absl::MakeCordFromExternal( data, ObjectForCordSubstr>( std::forward(object), data))); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation>>, std::negation, ConverterToCordWhole>>, std::negation< SupportsExternalRefSubstr>>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { ObjectForCordWhole> object_for_cord( std::forward(object)); const absl::string_view moved_data = BytesRef(*object_for_cord); use_cord_(context_, absl::MakeCordFromExternal( moved_data, std::move(object_for_cord))); } void* context_; UseStringViewFunction use_string_view_; UseCordFunction use_cord_; }; template class ConverterToCordSubstr { public: ConverterToCordSubstr(const ConverterToCordSubstr&) = delete; ConverterToCordSubstr& operator=(const ConverterToCordSubstr&) = delete; template void operator()(SubT&& subobject) && { std::move (*this)(std::forward(subobject), substr_); } template void operator()(SubT&& subobject, absl::string_view substr) && { RIEGELI_ASSERT_EQ(substr_.size(), substr.size()) << "ExternalRef: size mismatch"; // The constructor processes the subobject. ConverterToCordSubstr converter(std::forward(subobject), substr, context_, use_string_view_, use_cord_); } private: friend class ExternalRef; ABSL_ATTRIBUTE_ALWAYS_INLINE explicit ConverterToCordSubstr(T&& object, absl::string_view substr, void* context, UseStringViewFunction use_string_view, UseCordFunction use_cord) : substr_(substr), context_(context), use_string_view_(use_string_view), use_cord_(use_cord) { AssertSubstr(object, substr_); if (RiegeliExternalCopy(&object)) { use_string_view_(context_, substr_); ExternalRef::CallOperatorSubstr(std::forward(object), substr_); return; } std::move(*this).Callback(std::forward(object)); } template ::value, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_cord_(context_, ExternalRef::ToCordSubstr(std::forward(object), substr_)); } template < typename DependentT = T, std::enable_if_t>, HasToCordSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_cord_(context_, ExternalRef::ToCordSubstr( absl::remove_cvref_t(object), substr_)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation< HasToCordSubstr>>, HasExternalDelegateSubstr>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { ExternalRef::ExternalDelegateSubstr(std::forward(object), substr_, std::move(*this)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation< HasToCordSubstr>>, std::negation>, HasExternalDelegateSubstr, ConverterToCordSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { ExternalRef::ExternalDelegateSubstr(absl::remove_cvref_t(object), substr_, std::move(*this)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation< HasToCordSubstr>>, std::negation, ConverterToCordSubstr>>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_cord_(context_, absl::MakeCordFromExternal( substr_, ObjectForCordSubstr>( std::forward(object), substr_))); } absl::string_view substr_; void* context_; UseStringViewFunction use_string_view_; UseCordFunction use_cord_; }; template class ExternalObjectWhole { public: explicit ExternalObjectWhole(Initializer object) : object_(std::move(object)) {} ~ExternalObjectWhole() { ExternalRef::CallOperatorWhole(std::move(object_)); } T& operator*() { return object_; } const T& operator*() const { return object_; } private: ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS T object_; }; template class ExternalObjectSubstr; template class ExternalObjectSubstr< T, std::enable_if_t::value>> { public: explicit ExternalObjectSubstr(Initializer object, absl::string_view substr) : object_(std::move(object)), substr_(substr) { AssertSubstr(**this, substr); } ~ExternalObjectSubstr() { std::move(object_)(substr_); } T& operator*() { return object_; } const T& operator*() const { return object_; } private: ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS T object_; absl::string_view substr_; }; template class ExternalObjectSubstr::value>> : public ExternalObjectWhole { public: explicit ExternalObjectSubstr(Initializer object, absl::string_view substr) : ExternalObjectSubstr::ExternalObjectWhole(std::move(object)) { AssertSubstr(**this, substr); } }; template struct HasRiegeliToExternalDataWhole : std::false_type {}; template struct HasRiegeliToExternalDataWhole< T, std::enable_if_t>())), ExternalData>>> : std::true_type {}; template struct HasRiegeliToExternalDataSubstr : std::false_type {}; template struct HasRiegeliToExternalDataSubstr< T, std::enable_if_t>(), std::declval())), ExternalData>>> : std::true_type {}; template struct HasRiegeliToExternalStorage : std::false_type {}; template struct HasRiegeliToExternalStorage< T, std::enable_if_t>())), ExternalStorage>>> : std::true_type {}; template struct HasToExternalDataSubstr : std::disjunction, HasRiegeliToExternalStorage> {}; template ::value, int> = 0> static ExternalData ToExternalDataSubstr(T&& object, absl::string_view substr) { return RiegeliToExternalData(ExternalRef::Pointer(std::forward(object)), substr); } template < typename T, std::enable_if_t< std::disjunction_v>, HasRiegeliToExternalStorage>, int> = 0> static ExternalData ToExternalDataSubstr(T&& object, absl::string_view substr) { return ExternalData{ RiegeliToExternalStorage(ExternalRef::Pointer(std::forward(object))), substr}; } template struct HasToExternalDataWhole : std::disjunction, HasToExternalDataSubstr> {}; template ::value, int> = 0> static ExternalData ToExternalDataWhole(T&& object) { return RiegeliToExternalData(ExternalRef::Pointer(std::forward(object))); } template < typename T, std::enable_if_t< std::conjunction_v>, HasToExternalDataSubstr>, int> = 0> static ExternalData ToExternalDataWhole(T&& object) { const absl::string_view data = BytesRef(object); return ExternalRef::ToExternalDataSubstr(std::forward(object), data); } template ::value, int> = 0> static ExternalData ToExternalDataWhole( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) { return RiegeliToExternalData(ExternalRef::Pointer(std::forward(object))); } template < typename T, std::enable_if_t< std::conjunction_v>, HasToExternalDataSubstr>, int> = 0> static ExternalData ToExternalDataWhole(T&& object, absl::string_view data) { return ExternalRef::ToExternalDataSubstr(std::forward(object), data); } template class ConverterToExternalDataWhole { public: ConverterToExternalDataWhole(const ConverterToExternalDataWhole&) = delete; ConverterToExternalDataWhole& operator=( const ConverterToExternalDataWhole&) = delete; template < typename SubT, std::enable_if_t, int> = 0> void operator()(SubT&& subobject) && { // The constructor processes the subobject. const absl::string_view data = BytesRef(subobject); ConverterToExternalDataWhole converter( std::forward(subobject), data, context_, use_external_data_); } template void operator()(SubT&& subobject, absl::string_view substr) && { // The constructor processes the subobject. ConverterToExternalDataSubstr converter( std::forward(subobject), substr, context_, use_external_data_); } private: friend class ExternalRef; ABSL_ATTRIBUTE_ALWAYS_INLINE explicit ConverterToExternalDataWhole( T&& object, absl::string_view data, void* context, UseExternalDataFunction use_external_data) : context_(context), use_external_data_(use_external_data) { if (RiegeliExternalCopy(&object)) { use_external_data_(context_, ExternalDataCopy(data)); ExternalRef::CallOperatorWhole(std::forward(object)); return; } std::move(*this).Callback(std::forward(object), data); } template < typename DependentT = T, std::enable_if_t::value, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object, absl::string_view data) && { use_external_data_(context_, ExternalRef::ToExternalDataWhole( std::forward(object), data)); } template >, HasToExternalDataWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { use_external_data_(context_, ExternalRef::ToExternalDataWhole( absl::remove_cvref_t(object))); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v>>, HasExternalDelegateWhole< DependentT, ConverterToExternalDataWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object, absl::string_view data) && { ExternalRef::ExternalDelegateWhole(std::forward(object), data, std::move(*this)); } template >>, std::negation>, HasExternalDelegateWhole, ConverterToExternalDataWhole>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback( T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) && { ExternalRef::ExternalDelegateWhole(absl::remove_cvref_t(object), std::move(*this)); } template >>, std::negation, ConverterToExternalDataWhole>>>, int> = 0> void Callback(T&& object, ABSL_ATTRIBUTE_UNUSED absl::string_view data) { auto* const storage = new ExternalObjectWhole>(std::forward(object)); const absl::string_view moved_data = BytesRef(**storage); use_external_data_( context_, ExternalData{ ExternalStorage( storage, [](void* ptr) { delete static_cast>*>( ptr); }), moved_data}); } void* context_; UseExternalDataFunction use_external_data_; }; template class ConverterToExternalDataSubstr { public: ConverterToExternalDataSubstr(const ConverterToExternalDataSubstr&) = delete; ConverterToExternalDataSubstr& operator=( const ConverterToExternalDataSubstr&) = delete; template void operator()(SubT&& subobject) && { std::move (*this)(std::forward(subobject), substr_); } template void operator()(SubT&& subobject, absl::string_view substr) && { RIEGELI_ASSERT_EQ(substr_.size(), substr.size()) << "ExternalRef: size mismatch"; // The constructor processes the subobject. ConverterToExternalDataSubstr converter( std::forward(subobject), substr, context_, use_external_data_); } private: friend class ExternalRef; ABSL_ATTRIBUTE_ALWAYS_INLINE explicit ConverterToExternalDataSubstr( T&& object, absl::string_view substr, void* context, UseExternalDataFunction use_external_data) : substr_(substr), context_(context), use_external_data_(use_external_data) { AssertSubstr(object, substr_); if (RiegeliExternalCopy(&object)) { use_external_data_(context_, ExternalDataCopy(substr_)); ExternalRef::CallOperatorSubstr(std::forward(object), substr_); return; } std::move(*this).Callback(std::forward(object)); } template < typename DependentT = T, std::enable_if_t::value, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_external_data_(context_, ExternalRef::ToExternalDataSubstr( std::forward(object), substr_)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation>, HasToExternalDataSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_external_data_(context_, ExternalRef::ToExternalDataSubstr( absl::remove_cvref_t(object), substr_)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v>>, HasExternalDelegateSubstr< DependentT, ConverterToExternalDataSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { ExternalRef::ExternalDelegateSubstr(std::forward(object), substr_, std::move(*this)); } template < typename DependentT = T, std::enable_if_t< std::conjunction_v< std::negation< HasToExternalDataSubstr>>, std::negation>, HasExternalDelegateSubstr, ConverterToExternalDataSubstr>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { ExternalRef::ExternalDelegateSubstr(absl::remove_cvref_t(object), substr_, std::move(*this)); } template >>, std::negation, ConverterToExternalDataSubstr>>>, int> = 0> ABSL_ATTRIBUTE_ALWAYS_INLINE void Callback(T&& object) && { use_external_data_( context_, ExternalData{ ExternalStorage( new ExternalObjectSubstr>( std::forward(object), substr_), [](void* ptr) { delete static_cast>*>( ptr); }), substr_}); } absl::string_view substr_; void* context_; UseExternalDataFunction use_external_data_; }; class StorageBase { protected: StorageBase() = default; StorageBase(const StorageBase&) = delete; StorageBase& operator=(const StorageBase&) = delete; virtual ~StorageBase() = default; void Initialize(absl::string_view substr) { substr_ = substr; } private: friend class ExternalRef; // Converts the external object either to `absl::string_view` or // `Chain::Block` by calling once either `use_string_view` or // `use_chain_block`. virtual void ToChainBlock(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseChainBlockFunction use_chain_block) && = 0; // Converts the external object either to `absl::string_view` or // `absl::Cord` by calling once either `use_string_view` or `use_cord`. virtual void ToCord(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseCordFunction use_cord) && = 0; // Converts the external object to `ExternalData` by calling once // `use_external_data`. virtual void ToExternalData( void* context, UseExternalDataFunction use_external_data) && = 0; bool empty() const { return substr_.empty(); } const char* data() const { return substr_.data(); } size_t size() const { return substr_.size(); } absl::string_view substr() const { return substr_; } absl::string_view substr_; }; template class StorageWholeWithoutCallOperator final : public StorageBase { public: StorageWholeWithoutCallOperator() = default; StorageWholeWithoutCallOperator(const StorageWholeWithoutCallOperator&) = delete; StorageWholeWithoutCallOperator& operator=( const StorageWholeWithoutCallOperator&) = delete; private: friend class ExternalRef; void Initialize(Initializer object) { object_.emplace( std::move(object).Reference(std::move(temporary_storage_))); StorageBase::Initialize(BytesRef(*object_)); } void ToChainBlock(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseChainBlockFunction use_chain_block) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToChainBlockWhole converter(*std::move(object_), substr(), context, use_string_view, use_chain_block); } void ToCord(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseCordFunction use_cord) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToCordWhole converter(*std::move(object_), substr(), context, use_string_view, use_cord); } void ToExternalData(void* context, UseExternalDataFunction use_external_data) && override { // The constructor processes the object. ConverterToExternalDataWhole converter(*std::move(object_), substr(), context, use_external_data); } TemporaryStorage object_; ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS TemporaryStorage temporary_storage_; }; template class StorageWholeWithCallOperator final : public StorageBase { public: StorageWholeWithCallOperator() = default; StorageWholeWithCallOperator(const StorageWholeWithCallOperator&) = delete; StorageWholeWithCallOperator& operator=( const StorageWholeWithCallOperator&) = delete; ~StorageWholeWithCallOperator() { if (object_ != nullptr) { ExternalRef::CallOperatorSubstr(std::forward(*object_), substr()); } } private: friend class ExternalRef; void Initialize(Initializer object) { T&& reference = std::move(object).Reference(std::move(temporary_storage_)); object_ = &reference; StorageBase::Initialize(BytesRef(*object_)); } void ToChainBlock(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseChainBlockFunction use_chain_block) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToChainBlockWhole converter( ExtractObject(), substr(), context, use_string_view, use_chain_block); } void ToCord(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseCordFunction use_cord) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToCordWhole converter(ExtractObject(), substr(), context, use_string_view, use_cord); } void ToExternalData(void* context, UseExternalDataFunction use_external_data) && override { // The constructor processes the object. ConverterToExternalDataWhole converter(ExtractObject(), substr(), context, use_external_data); } T&& ExtractObject() { return std::forward(*std::exchange(object_, nullptr)); } std::remove_reference_t* object_ = nullptr; ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS TemporaryStorage temporary_storage_; }; template class StorageSubstrWithoutCallOperator final : public StorageBase { private: friend class ExternalRef; using T = TargetRefT; void Initialize(Arg arg, absl::string_view substr) { StorageBase::Initialize(substr); arg_.emplace(std::forward(arg)); } void ToChainBlock(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseChainBlockFunction use_chain_block) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToChainBlockSubstr converter( initializer().Reference(), substr(), context, use_string_view, use_chain_block); } void ToCord(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseCordFunction use_cord) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToCordSubstr converter(initializer().Reference(), substr(), context, use_string_view, use_cord); } void ToExternalData(void* context, UseExternalDataFunction use_external_data) && override { // The constructor processes the object. ConverterToExternalDataSubstr converter( initializer().Reference(), substr(), context, use_external_data); } Initializer initializer() { return std::forward(*arg_); } TemporaryStorage arg_; }; template class StorageSubstrWithCallOperator final : public StorageBase { public: StorageSubstrWithCallOperator() = default; StorageSubstrWithCallOperator(const StorageSubstrWithCallOperator&) = delete; StorageSubstrWithCallOperator& operator=( const StorageSubstrWithCallOperator&) = delete; ~StorageSubstrWithCallOperator() { if (object_ != nullptr) { ExternalRef::CallOperatorSubstr(std::forward(*object_), substr()); } } private: friend class ExternalRef; void Initialize(Initializer object, absl::string_view substr) { StorageBase::Initialize(substr); T&& reference = std::move(object).Reference(std::move(temporary_storage_)); object_ = &reference; } void ToChainBlock(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseChainBlockFunction use_chain_block) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToChainBlockSubstr converter( ExtractObject(), substr(), context, use_string_view, use_chain_block); } void ToCord(size_t max_bytes_to_copy, void* context, UseStringViewFunction use_string_view, UseCordFunction use_cord) && override { if (size() <= max_bytes_to_copy) { use_string_view(context, substr()); return; } // The constructor processes the object. ConverterToCordSubstr converter(ExtractObject(), substr(), context, use_string_view, use_cord); } void ToExternalData(void* context, UseExternalDataFunction use_external_data) && override { // The constructor processes the object. ConverterToExternalDataSubstr converter(ExtractObject(), substr(), context, use_external_data); } T&& ExtractObject() { return std::forward(*std::exchange(object_, nullptr)); } std::remove_reference_t* object_ = nullptr; ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS TemporaryStorage temporary_storage_; }; template struct StorageWholeImpl; template struct StorageWholeImpl< T, std::enable_if_t>::value>> { using type = StorageWholeWithoutCallOperator; }; template struct StorageWholeImpl< T, std::enable_if_t>::value>> { using type = StorageWholeWithCallOperator; }; template struct StorageSubstrImpl; template struct StorageSubstrImpl>>::value>> { using type = StorageSubstrWithoutCallOperator; }; template struct StorageSubstrImpl< Arg, std::enable_if_t< HasCallOperator>>::value>> { using type = StorageSubstrWithCallOperator>; }; public: // The type of the `storage` parameter for the constructor and // `ExternalRef::From()` which take an external object convertible // to `BytesRef`. template using StorageWhole = typename StorageWholeImpl::type; // The type of the `storage` parameter for the constructor and // `ExternalRef::From()` which take an external object and its substring. template using StorageSubstr = typename StorageSubstrImpl::type; // Constructs an `ExternalRef` from an external object or its `Initializer`. // See class comments for expectations on the external object. // // The object must be convertible to `BytesRef`. // // `storage` must outlive usages of the returned `ExternalRef`. template >::value, int> = 0> /*implicit*/ ExternalRef(Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, StorageWhole>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : storage_(&storage) { storage.Initialize(std::forward(arg)); } // Constructs an `ExternalRef` from an external object or its `Initializer`. // See class comments for expectations on the external object. // // `substr` must be owned by the object if it gets created or moved. // // `storage` must outlive usages of the returned `ExternalRef`. // // The object is not created if an initializer is passed rather than an // already constructed object, the object type does not use the call operator, // and only `absl::string_view` turns out to be needed. Hence `StorageSubstr` // is parameterized by `Arg&&` rather than `TargetRefT`, so that it can // keep the original initializer. template >::value, int> = 0> explicit ExternalRef( Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, absl::string_view substr, StorageSubstr&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) : storage_(&storage) { storage.Initialize(std::forward(arg), substr); } ExternalRef(ExternalRef&& that) = default; ExternalRef& operator=(ExternalRef&&) = delete; // Like `ExternalRef` constructor, but `RiegeliSupportsExternalRef()` or // `RiegeliSupportsExternalRefWhole()` is not needed. The caller is // responsible for using an appropriate type of the external object. template , BytesRef>, int> = 0> static ExternalRef From(Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, StorageWhole>&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) { storage.Initialize(std::forward(arg)); return ExternalRef(&storage); } // Like `ExternalRef` constructor, but `RiegeliSupportsExternalRef()` is not // needed. The caller is responsible for using an appropriate type of the // external object. // // The object is not created if an initializer is passed rather than an // already constructed object, the object type does not use the call operator, // and only `absl::string_view` turns out to be needed. Hence `StorageSubstr` // is parameterized by `Arg&&` rather than `TargetRefT`, so that it can // keep the original initializer. template static ExternalRef From( Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND, absl::string_view substr, StorageSubstr&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) { storage.Initialize(std::forward(arg), substr); return ExternalRef(&storage); } // Returns `true` if the data size is 0. bool empty() const { return storage_->empty(); } // Returns the data pointer. const char* data() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return storage_->data(); } // Returns the data size. size_t size() const { return storage_->size(); } // Returns the data as `absl::string_view`. // // This `ExternalRef` must outlive usages of the returned `absl::string_view`. /*implicit*/ operator absl::string_view() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return storage_->substr(); } // The data can be converted to `Chain` using: // * `Chain::Chain(ExternalRef)` // * `Chain::Reset(ExternalRef)` or `riegeli::Reset(Chain&, ExternalRef)` // * `Chain::Append(ExternalRef)` // * `Chain::Prepend(ExternalRef)` // Converts the data to `absl::Cord`. explicit operator absl::Cord() && { absl::Cord result; // Destruction of a just default-constructed `absl::Cord` can be optimized // out. Construction in place is more efficient than assignment. result.~Cord(); std::move(*storage_).ToCord( cord_internal::kMaxBytesToCopyToEmptyCord, &result, [](void* context, absl::string_view data) { new (context) absl::Cord(cord_internal::MakeBlockyCord(data)); }, [](void* context, absl::Cord data) { new (context) absl::Cord(std::move(data)); }); return result; } // Supports `riegeli::Reset(absl::Cord&, ExternalRef)`. friend void RiegeliReset(absl::Cord& dest, ExternalRef src) { std::move(src).AssignTo(dest); } // Appends the data to `dest`. void AppendTo(absl::Cord& dest) && { std::move(*storage_).ToCord( cord_internal::MaxBytesToCopyToCord(dest), &dest, [](void* context, absl::string_view data) { cord_internal::AppendToBlockyCord(data, *static_cast(context)); }, [](void* context, absl::Cord data) { static_cast(context)->Append(std::move(data)); }); } // Prepends the data to `dest`. void PrependTo(absl::Cord& dest) && { std::move(*storage_).ToCord( cord_internal::MaxBytesToCopyToCord(dest), &dest, [](void* context, absl::string_view data) { cord_internal::PrependToBlockyCord( data, *static_cast(context)); }, [](void* context, absl::Cord data) { static_cast(context)->Prepend(std::move(data)); }); } // Returns a type-erased external object with its deleter and data. explicit operator ExternalData() && { ExternalData result{ExternalStorage(nullptr, nullptr), absl::string_view()}; // Destruction of just constructed `ExternalData` can be optimized out. // Construction in place is more efficient than assignment. result.~ExternalData(); std::move(*storage_).ToExternalData( &result, [](void* context, ExternalData data) { new (context) ExternalData(std::move(data)); }); return result; } private: // For `InitializeTo()`, `AssignTo()`, `AppendTo()`, and `PrependTo()`. friend class Chain; explicit ExternalRef(StorageBase* storage) : storage_(storage) {} // Assigns the data to `dest` which is expected to be just // default-constructed. void InitializeTo(Chain& dest) && { // Destruction of a just default-constructed `Chain` can be optimized out. // Construction in place is more efficient than assignment. dest.~Chain(); std::move(*storage_).ToChainBlock( Chain::kMaxBytesToCopyToEmpty, &dest, [](void* context, absl::string_view data) { new (context) Chain(data); }, [](void* context, Chain::Block data) { new (context) Chain(std::move(data)); }); } // Assigns the data to `dest`. void AssignTo(Chain& dest) && { std::move(*storage_).ToChainBlock( Chain::kMaxBytesToCopyToEmpty, &dest, [](void* context, absl::string_view data) { static_cast(context)->Reset(data); }, [](void* context, Chain::Block data) { static_cast(context)->Reset(std::move(data)); }); } // Assigns the data to `dest`. void AssignTo(absl::Cord& dest) && { std::move(*storage_).ToCord( cord_internal::kMaxBytesToCopyToEmptyCord, &dest, [](void* context, absl::string_view data) { cord_internal::AssignToBlockyCord(data, *static_cast(context)); }, [](void* context, absl::Cord data) { *static_cast(context) = std::move(data); }); } // Appends the data to `dest`. void AppendTo(Chain& dest) && { std::move(*storage_).ToChainBlock( dest.MaxBytesToCopy(), &dest, [](void* context, absl::string_view data) { static_cast(context)->Append(data); }, [](void* context, Chain::Block data) { static_cast(context)->Append(std::move(data)); }); } void AppendTo(Chain& dest, Chain::Options options) && { ChainWithOptions chain_with_options = {&dest, options}; std::move(*storage_).ToChainBlock( dest.MaxBytesToCopy(options), &chain_with_options, [](void* context, absl::string_view data) { static_cast(context)->dest->Append( data, static_cast(context)->options); }, [](void* context, Chain::Block data) { static_cast(context)->dest->Append( std::move(data), static_cast(context)->options); }); } // Prepends the data to `dest`. void PrependTo(Chain& dest) && { std::move(*storage_).ToChainBlock( dest.MaxBytesToCopy(), &dest, [](void* context, absl::string_view data) { static_cast(context)->Prepend(data); }, [](void* context, Chain::Block data) { static_cast(context)->Prepend(std::move(data)); }); } void PrependTo(Chain& dest, Chain::Options options) && { ChainWithOptions chain_with_options = {&dest, options}; std::move(*storage_).ToChainBlock( dest.MaxBytesToCopy(options), &chain_with_options, [](void* context, absl::string_view data) { static_cast(context)->dest->Prepend( data, static_cast(context)->options); }, [](void* context, Chain::Block data) { static_cast(context)->dest->Prepend( std::move(data), static_cast(context)->options); }); } struct ChainWithOptions { Chain* dest; Chain::Options options; }; StorageBase* storage_; }; } // namespace riegeli #endif // RIEGELI_BASE_EXTERNAL_REF_BASE_H_ ================================================ FILE: riegeli/base/external_ref_support.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_EXTERNAL_REF_SUPPORT_H_ #define RIEGELI_BASE_EXTERNAL_REF_SUPPORT_H_ #include #include #include #include #include #include #include "absl/base/attributes.h" #include "riegeli/base/bytes_ref.h" #include "riegeli/base/external_data.h" namespace riegeli { // Default implementation for `ExternalRef` support. inline bool RiegeliExternalCopy(ABSL_ATTRIBUTE_UNUSED const void* self) { return false; } // Indicates support for `ExternalRef(std::string&&)`. void RiegeliSupportsExternalRefWhole(std::string*); // Indicates support for: // * `ExternalRef(std::vector&&)` // * `ExternalRef(std::vector&&, substr)` template void RiegeliSupportsExternalRef(std::vector*); // Indicates support for `ExternalRef(std::unique_ptr&&, substr)`. template void RiegeliSupportsExternalRef(std::unique_ptr*); template inline ExternalStorage RiegeliToExternalStorage(std::unique_ptr* self) { return ExternalStorage(const_cast*>(self->release()), [](void* ptr) { delete static_cast(ptr); }); } template inline ExternalStorage RiegeliToExternalStorage(std::unique_ptr* self) { return ExternalStorage(const_cast*>(self->release()), [](void* ptr) { delete[] static_cast(ptr); }); } // Indicates support for: // * `ExternalRef(const std::shared_ptr&, substr)` // * `ExternalRef(std::shared_ptr&&, substr)` template void RiegeliSupportsExternalRef(const std::shared_ptr*); namespace external_ref_internal { template struct PointerType { using type = T*; }; template struct PointerType { using type = const T*; }; template struct PointerType { using type = T*; }; template using PointerTypeT = typename PointerType::type; template struct HasRiegeliSupportsExternalRefWhole : std::false_type {}; template struct HasRiegeliSupportsExternalRefWhole< T, std::void_t>()))>> : std::true_type {}; template struct HasRiegeliSupportsExternalRef : std::false_type {}; template struct HasRiegeliSupportsExternalRef< T, std::void_t>()))>> : std::true_type {}; } // namespace external_ref_internal template struct SupportsExternalRefWhole : std::conjunction< std::disjunction< external_ref_internal::HasRiegeliSupportsExternalRefWhole, external_ref_internal::HasRiegeliSupportsExternalRef>, std::is_convertible> {}; template struct SupportsExternalRefSubstr : external_ref_internal::HasRiegeliSupportsExternalRef {}; } // namespace riegeli #endif // RIEGELI_BASE_EXTERNAL_REF_SUPPORT_H_ ================================================ FILE: riegeli/base/global.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_GLOBAL_H_ #define RIEGELI_BASE_GLOBAL_H_ #include #include #include #include "absl/base/nullability.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `Global()` returns a const reference to a default-constructed object of // type `T`. // // All calls with the given `T` type return a reference to the same object. // // The object is created when `Global` is first called with the given `T` type, // and is never destroyed. template , int> = 0> const T& Global(); // `Global(construct)` returns a reference to an object returned by `construct`. // // The object is created when `Global` is first called with the given // `construct` type, and is never destroyed. // // If `T` is not const-qualified, this is recommended only when the object is // thread-safe, or when it will be accessed only in a thread-safe way despite // its non-const type. // // The `construct` type should be a lambda with no captures. This restriction is // a safeguard against making the object dependent on local state, which would // be misleadingly ignored for subsequent calls. Since distinct lambdas have // distinct types, distinct call sites with lambdas return references to // distinct objects. template , std::is_invocable>, int> = 0> std::decay_t>& Global(Construct construct); // `Global(construct, initialize)` returns a reference to an object returned by // `construct`. After construction, `initialize` is called on the reference. // // The object is created when `Global` is first called with the given // `construct` and `initialize` types, and is never destroyed. // // If `T` is not const-qualified, this is recommended only when the object is // thread-safe, or when it will be accessed only in a thread-safe way despite // its non-const type. // // The `construct` and `initialize` types should be lambdas with no captures. // This restriction is a safeguard against making the object dependent on local // state, which would be misleadingly ignored for subsequent calls. Since // distinct lambdas have distinct types, distinct call sites with lambdas return // references to distinct objects. template < typename Construct, typename Initialize, std::enable_if_t< std::conjunction_v< std::is_empty, std::is_empty, std::is_invocable>&>>, int> = 0> std::decay_t>& Global(Construct construct, Initialize initialize); // Implementation details follow. namespace global_internal { template class NoDestructor { public: NoDestructor() { new (storage_) T(); } template explicit NoDestructor(Construct construct) { new (storage_) T(std::invoke(construct)); } template explicit NoDestructor(Construct construct, Initialize initialize) { new (storage_) T(std::invoke(construct)); std::invoke(initialize, object()); } NoDestructor(const NoDestructor&) = delete; NoDestructor& operator=(const NoDestructor&) = delete; T& object() { return *std::launder(reinterpret_cast(storage_)); } private: alignas(T) char storage_[sizeof(T)]; }; } // namespace global_internal template , int>> inline const T& Global() { static global_internal::NoDestructor kStorage; return kStorage.object(); } template , std::is_invocable>, int>> inline std::decay_t>& Global( Construct construct) { static global_internal::NoDestructor< std::decay_t>> kStorage(construct); return kStorage.object(); } template < typename Construct, typename Initialize, std::enable_if_t< std::conjunction_v< std::is_empty, std::is_empty, std::is_invocable>&>>, int>> inline std::decay_t>& Global( Construct construct, Initialize initialize) { static global_internal::NoDestructor< std::decay_t>> kStorage(construct, initialize); return kStorage.object(); } } // namespace riegeli #endif // RIEGELI_BASE_GLOBAL_H_ ================================================ FILE: riegeli/base/hybrid_direct_common.h ================================================ // Copyright 2026 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_HYBRID_DIRECT_COMMON_H_ #define RIEGELI_BASE_HYBRID_DIRECT_COMMON_H_ // IWYU pragma: private, include "riegeli/base/hybrid_direct_map.h" // IWYU pragma: private, include "riegeli/base/hybrid_direct_set.h" #include #include #include "absl/base/nullability.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { namespace hybrid_direct_internal { template struct HasRiegeliHybridDirectToRawKey : std::false_type {}; template struct HasRiegeliHybridDirectToRawKey< Key, std::enable_if_t()))>>> : std::true_type {}; template struct HasRiegeliHybridDirectFromRawKey : std::false_type {}; template struct HasRiegeliHybridDirectFromRawKey< Key, std::enable_if_t()), static_cast(nullptr))), Key>>> : std::true_type {}; } // namespace hybrid_direct_internal // The default `Traits` parameter for `HybridDirectMap` and `HybridDirectSet`, // which specifies a mapping of keys to an unsigned integer type. // // Key types supported by default are integral types, enum types, and types // supporting `RiegeliHybridDirectToRawKey()` as below. The latter takes // precedence. // // To override `HybridDirectTraits` for a type `Key`, define a free function // `friend RawKey RiegeliHybridDirectToRawKey(Key key)` as a friend of `Key` // inside class definition or in the same namespace as `Key`, so that it can be // found via ADL. Different `Key` values must yield different `RawKey` values. // // Optionally, define also a free function // `friend Key RiegeliHybridDirectFromRawKey(RawKey raw_key, Key*)`. // This is needed only for iterators. // // The second argument of `RiegeliHybridDirectFromRawKey()` is always a null // pointer, used to choose the right overload based on the type. // // `expected_min_key` is the expected lower bound of keys. Keys smaller than // that are never put in the array. `expected_min_key` has a type which supports // `static_cast(expected_min_key)`. template struct HybridDirectTraits; template struct HybridDirectTraits< Key, expected_min_key, std::enable_if_t< hybrid_direct_internal::HasRiegeliHybridDirectToRawKey::value>> { private: using RawKey = decltype(RiegeliHybridDirectToRawKey(std::declval())); public: static RawKey ToRawKey(Key key) { // Wrap-around is not an error. return static_cast(RiegeliHybridDirectToRawKey(key) - static_cast(expected_min_key)); } template < typename DependentKey = Key, std::enable_if_t::value, int> = 0> static Key FromRawKey(RawKey raw_key) { // Wrap-around is not an error. return RiegeliHybridDirectFromRawKey( static_cast(raw_key + static_cast(expected_min_key)), static_cast(nullptr)); } }; template struct HybridDirectTraits< Key, expected_min_key, std::enable_if_t>, std::is_integral>>> { private: using RawKey = std::make_unsigned_t; public: static RawKey ToRawKey(Key key) { // Wrap-around is not an error. return static_cast(static_cast(key) - static_cast(expected_min_key)); } static Key FromRawKey(RawKey raw_key) { // Wrap-around is not an error. return static_cast( static_cast(raw_key + static_cast(expected_min_key))); } }; template struct HybridDirectTraits< Key, expected_min_key, std::enable_if_t>, std::is_enum>>> { private: using RawKey = std::make_unsigned_t>; public: static RawKey ToRawKey(Key key) { // Wrap-around is not an error. return static_cast(static_cast(key) - static_cast(expected_min_key)); } static Key FromRawKey(RawKey raw_key) { // Wrap-around is not an error. return static_cast( static_cast(raw_key + static_cast(expected_min_key))); } }; // The default `direct_capacity` parameter for `HybridDirectMap` and // `HybridDirectSet` building. constexpr size_t kHybridDirectDefaultDirectCapacity = 128; } // namespace riegeli #endif // RIEGELI_BASE_HYBRID_DIRECT_COMMON_H_ ================================================ FILE: riegeli/base/hybrid_direct_internal.h ================================================ // Copyright 2026 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_HYBRID_DIRECT_INTERNAL_H_ #define RIEGELI_BASE_HYBRID_DIRECT_INTERNAL_H_ #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "riegeli/base/assert.h" #include "riegeli/base/compare.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli::hybrid_direct_internal { // Wraps a `T` which is constructed explicitly later, rather than when // `DelayedConstructor` is constructed. // // In contrast to `std::optional`, this avoids the overhead of tracking // whether the object has been constructed, at the cost of passing this // responsibility to the caller. // // Either `emplace()` or `Abandon()` must be called exactly once. // If `emplace()` is called, the regular destructor should be called later. // If `Abandon()` is called, the regular destructor must not be called. template class DelayedConstructor { public: // Does not construct the wrapped object yet. DelayedConstructor() noexcept {} DelayedConstructor(const DelayedConstructor&) = delete; DelayedConstructor& operator=(const DelayedConstructor&) = delete; // Destroys the wrapped object. It must have been constructed. ~DelayedConstructor() { value_.~T(); } // Constructs the wrapped object. It must not have been constructed yet. template , int> = 0> T& emplace(Args&&... args) ABSL_ATTRIBUTE_LIFETIME_BOUND { new (&value_) T(std::forward(args)...); return value_; } // Destroys the `DelayedConstructor`. The wrapped object must not have been // constructed. This is needed for `SizedArray`. void Abandon() {} // Returns the wrapped object. It must have been constructed. T& operator*() ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; } const T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return value_; } private: union { T value_; }; }; // A deleter for `SizedArray`. // // If `supports_abandon` is true, truncation with `AbandonAfter()` is supported, // at the cost of some overhead. // // A moved-from `SizedDeleter` reports a positive size. This helps to trigger // a null pointer dereference when a moved-from `SizedArray` is used. template class SizedDeleter { public: static size_t max_size() { return kSizeMask / sizeof(T); } SizedDeleter() = default; explicit SizedDeleter(size_t size) : size_(size) {} SizedDeleter(SizedDeleter&& that) noexcept : size_(std::exchange(that.size_, kPoisonedSize)) {} SizedDeleter& operator=(SizedDeleter&& that) noexcept { size_ = std::exchange(that.size_, kPoisonedSize); return *this; } void operator()(T* ptr) const { for (T* iter = ptr + (size_ & kSizeMask); iter != ptr;) { --iter; iter->~T(); } if (ABSL_PREDICT_FALSE((size_ & kOverallocated) != 0)) { // The allocated size is not tracked and sized delete must not be used. if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { operator delete[](ptr); } else { operator delete[](ptr, std::align_val_t(alignof(T))); } return; } if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { operator delete[](ptr, size_ * sizeof(T)); } else { operator delete[](ptr, size_ * sizeof(T), std::align_val_t(alignof(T))); } } size_t size() const { return size_ & kSizeMask; } // If the pointer associated with this deleter is `nullptr`, returns `true` // when this deleter is moved-from. Otherwise the result is meaningless. bool IsMovedFromIfNull() const { return size_ == kPoisonedSize; } // Reduces the size to `new_size`. Calls `Abandon()` on elements being // abandoned. The regular destructor will not be called for them. // // `SizedDeleter` is optimized for the case when `AbandonAfter()` is never // called with a changed size. template = 0> void AbandonAfter(T* ptr, size_t new_size) { RIEGELI_ASSERT_LE(new_size, size_ & kSizeMask) << "Failed precondition of SizedDeleter::AbandonAfter(): " "array size overflow"; if (ABSL_PREDICT_TRUE(new_size == size_)) return; T* const new_end = ptr + new_size; for (T* iter = ptr + (size_ & kSizeMask); iter != new_end;) { --iter; iter->Abandon(); } size_ = new_size | kOverallocated; } private: // A moved-from `SizedDeleter` has `size_ == kPoisonedSize`. In debug mode // this asserts against using a moved-from object. In non-debug mode, if the // key is not too large, then this triggers a null pointer dereference with an // offset up to 1MB, which is assumed to reliably crash. static constexpr size_t kPoisonedSize = (size_t{1} << 20) / sizeof(T); // If `supports_abandon` is true, `size_` tracks the current size and whether // the original size has been reduced with `AbandonAfter()`. In that case // sized delete is not called because the allocated size is not tracked. static constexpr size_t kSizeMask = std::numeric_limits::max() >> (supports_abandon ? 1 : 0); static constexpr size_t kOverallocated = ~kSizeMask; // The number of elements. If marked with `kOverallocated`, the allocated size // is not tracked and sized delete must not be used. size_t size_ = 0; }; // Like `std::unique_ptr`, but the size is stored in the deleter. // It is available as `get_deleter().size()` and used for sized delete. // // If `supports_abandon` is true, truncation with `get_deleter().AbandonAfter()` // is supported, at the cost of some overhead. // // A moved-from `SizedArray` is `nullptr` but reports a positive size. This // helps to trigger a null pointer dereference when a moved-from `SizedArray` // is used. template using SizedArray = std::unique_ptr>; // Like `std::make_unique(size)`. // // If `supports_abandon` is true, truncation with `get_deleter().AbandonAfter()` // is supported, at the cost of some overhead. template inline SizedArray MakeSizedArray(size_t size) { T* ptr; if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { ptr = static_cast(operator new[](size * sizeof(T))); } else { ptr = static_cast(operator new[](size * sizeof(T), std::align_val_t(alignof(T)))); } T* const end = ptr + size; for (T* iter = ptr; iter != end; ++iter) { new (iter) T(); } return SizedArray( ptr, SizedDeleter(size)); } // Like `std::make_unique_for_overwrite(size)`. // // If `supports_abandon` is true, truncation with `get_deleter().AbandonAfter()` // is supported, at the cost of some overhead. template inline SizedArray MakeSizedArrayForOverwrite(size_t size) { T* ptr; if constexpr (alignof(T) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { ptr = static_cast(operator new[](size * sizeof(T))); } else { ptr = static_cast(operator new[](size * sizeof(T), std::align_val_t(alignof(T)))); } T* const end = ptr + size; for (T* iter = ptr; iter != end; ++iter) { new (iter) T; } return SizedArray( ptr, SizedDeleter(size)); } // Performs an assignment, but the behavior is undefined if the old value of the // destination is not null. This allows the compiler skip generating the code // which deletes the old value. // // This is meant for initializing member variables of smart pointer types in // functions where the compiler cannot determine itself that the old value is // always null. template inline void AssignToAssumedNull(Dest& dest, Src&& src) { RIEGELI_ASSUME_EQ(dest, nullptr) << "Failed precondition of AssignToAssumedNull(): " "old value of destination is not null"; dest = std::forward(src); } // An iterator over a sequence of consecutive indices. Does not support the full // iterator API, only what is needed by `HybridDirectMap` and `HybridDirectSet`. template class IndexIterator : public WithEqual> { public: explicit IndexIterator(Index index) : index_(index) {} IndexIterator(const IndexIterator&) = default; IndexIterator& operator=(const IndexIterator&) = default; Index operator*() const { return index_; } IndexIterator& operator++() { ++index_; return *this; } friend bool operator==(const IndexIterator& a, const IndexIterator& b) { return a.index_ == b.index_; } private: Index index_; }; } // namespace riegeli::hybrid_direct_internal #endif // RIEGELI_BASE_HYBRID_DIRECT_INTERNAL_H_ ================================================ FILE: riegeli/base/hybrid_direct_map.h ================================================ // Copyright 2025 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_HYBRID_DIRECT_MAP_H_ #define RIEGELI_BASE_HYBRID_DIRECT_MAP_H_ #include #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "absl/container/flat_hash_map.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/compare.h" #include "riegeli/base/debug.h" #include "riegeli/base/hybrid_direct_common.h" // IWYU pragma: export #include "riegeli/base/hybrid_direct_internal.h" #include "riegeli/base/invoker.h" #include "riegeli/base/iterable.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { namespace hybrid_direct_internal { // Part of `HybridDirectMap` excluding constructors and assignment. This is // separated to make copy and move constructors and assignment available // conditionally. template class HybridDirectMapImpl { private: template class IteratorImpl; public: using key_type = Key; using mapped_type = Value; using value_type = std::pair; using reference = ReferencePair; using const_reference = ReferencePair; using pointer = ArrowProxy; using const_pointer = ArrowProxy; using iterator = IteratorImpl; using const_iterator = IteratorImpl; using size_type = size_t; using difference_type = ptrdiff_t; static size_t max_size(); ABSL_ATTRIBUTE_REINITIALIZES void Reset(); // Returns a pointer to the value associated with `key`, or `nullptr` if `key` // is absent. // // This can be a bit faster than `find()`. Value* absl_nullable FindOrNull(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND; const Value* absl_nullable FindOrNull(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND; // Returns a reference to the value associated with `key`, or a reference to // `default_value` if `key` is absent. const Value& FindOrDefault( Key key, const Value& default_value ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) const ABSL_ATTRIBUTE_LIFETIME_BOUND; iterator find(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator find(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND; bool contains(Key key) const; Value& at(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND; const Value& at(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND; bool empty() const { return direct_values_.get_deleter().size() == 0 && ABSL_PREDICT_TRUE(slow_map_ == nullptr); } size_t size() const; iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return begin(); } iterator end() ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); } protected: HybridDirectMapImpl() = default; HybridDirectMapImpl(const HybridDirectMapImpl& that) noexcept; HybridDirectMapImpl& operator=(const HybridDirectMapImpl& that) noexcept; HybridDirectMapImpl(HybridDirectMapImpl&& that) = default; HybridDirectMapImpl& operator=(HybridDirectMapImpl&& that) = default; template void Initialize(Src&& src, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity); template void InitializeByIndex(Index size, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity); static bool Equal(const HybridDirectMapImpl& a, const HybridDirectMapImpl& b); private: using RawKey = std::decay_t()))>; static_assert(std::is_unsigned_v); using DirectValues = SizedArray, /*supports_abandon=*/true>; using DirectMap = SizedArray; using SlowMap = absl::flat_hash_map; static constexpr int kInverseMinLoadFactor = 4; // 25%. template void Optimize(Iterator first, Iterator last, size_t size, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity); absl_nullable DirectValues CopyDirectValues() const; absl_nullable DirectMap CopyDirectMap(DelayedConstructor* absl_nullable dest_values) const; absl_nullable std::unique_ptr CopySlowMap() const; ABSL_ATTRIBUTE_NORETURN static void KeyNotFound(Key key); size_t FirstRawKey() const; size_t capacity() const { return direct_map_.get_deleter().size() + (slow_map_ == nullptr ? 0 : slow_map_->capacity()); } // Stores values for `direct_map_`, in no particular order. absl_nullable DirectValues direct_values_; // Indexed by raw key below `direct_map_.get_deleter().size()`. Elements // corresponding to present values point to elements of `direct_values_`. // The remaining elements are `nullptr`. absl_nullable DirectMap direct_map_; // If not `nullptr`, stores the mapping for keys too large for `direct_map_`. // Uses `std::unique_ptr` rather than `std::optional` to reduce memory usage // in the common case when `slow_map_` is not used. // // Invariant: if `slow_map_ != nullptr` then `!slow_map_->empty()`. absl_nullable std::unique_ptr slow_map_; }; } // namespace hybrid_direct_internal // `HybridDirectMap` is a map optimized for keys being mostly small integers // or enums, especially if they are dense near zero. It supports only lookups // and iteration, but no incremental modification. // // It stores a part of the map covering some range of small keys in an array // of pointers to values, directly indexed by the key. The remaining keys are // stored in an `absl::flat_hash_map`. // // `Traits` specifies a mapping of keys to an unsigned integer type. It must // support at least the following static members: // // ``` // // Translates the key to a raw key, which is an unsigned integer type. // // Different `Key` values must yield different `RawKey` values. Small raw // // keys are put in the array. // static RawKey ToRawKey(Key key); // // // Translates the raw key back to a key. // // // // This is optional. Needed only for iterators. // static Key FromRawKey(RawKey raw_key); // ``` // // `direct_capacity`, if specified during building, is the intended capacity // of the array part. The actual capacity can be smaller if all keys fit // in the array, or larger if the array remains at least 25% full. Default: // `kHybridDirectDefaultDirectCapacity` (128). // // In the case of duplicate keys, the first value wins. template > class HybridDirectMap : public hybrid_direct_internal::HybridDirectMapImpl, public ConditionallyConstructible, true>, public ConditionallyAssignable, true>, public WithEqual> { private: template struct HasCompatibleKeys : std::false_type {}; template struct HasCompatibleKeys< Src, std::enable_if_t>().first), Key>>> : std::true_type {}; template struct HasCompatibleValues : std::false_type {}; template struct HasCompatibleValues< Src, std::enable_if_t>().second), Value>>> : std::true_type {}; template struct HasProjectableKeys : std::false_type {}; template struct HasProjectableKeys< Src, KeyProjection, std::enable_if_t>, Key>>> : std::true_type {}; template struct HasProjectableValues : std::false_type {}; template struct HasProjectableValues< Src, ValueProjection, std::enable_if_t>, Value>>> : std::true_type {}; template struct HasGeneratableKeys : std::false_type {}; template struct HasGeneratableKeys< Index, KeyProjection, std::enable_if_t, Key>>> : std::true_type {}; template struct HasGeneratableValues : std::false_type {}; template struct HasGeneratableValues< Index, ValueProjection, std::enable_if_t, Value>>> : std::true_type {}; template struct DefaultKeyProjection { Key operator()(ElementTypeT entry) const { return entry.first; } }; template struct DefaultValueProjection { auto&& operator()(ElementTypeT&& entry) const { return std::forward>(entry).second; } }; public: // Constructs an empty `HybridDirectMap`. HybridDirectMap() = default; // Builds `HybridDirectMap` from an iterable `src`. Moves values if `src` is // an rvalue which owns its elements. template , IsForwardIterable, HasCompatibleKeys, HasCompatibleValues>, int> = 0> explicit HybridDirectMap(Src&& src) { this->Initialize(std::forward(src), DefaultKeyProjection(), DefaultValueProjection(), kHybridDirectDefaultDirectCapacity); } template , HasCompatibleKeys, HasCompatibleValues>, int> = 0> explicit HybridDirectMap(Src&& src, size_t direct_capacity) { this->Initialize(std::forward(src), DefaultKeyProjection(), DefaultValueProjection(), direct_capacity); } // Builds `HybridDirectMap` from an initializer list. /*implicit*/ HybridDirectMap( std::initializer_list> src, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Initialize(src, DefaultKeyProjection(), DefaultValueProjection(), direct_capacity); } // Builds `HybridDirectMap` from an iterable `src`. Moves values if `src` is // an rvalue which owns its elements. // // Keys and values are extracted using `key_projection()` and // `value_projection()` rather than `.first` and `.second`. `key_projection()` // may be called multiple times for each entry so it should be efficient. // `value_projection()` is called once for each entry so it can be expensive. template < typename Src, typename KeyProjection = DefaultKeyProjection, std::enable_if_t< std::conjunction_v< std::negation>, IsForwardIterable, HasProjectableKeys, HasCompatibleValues>, int> = 0> explicit HybridDirectMap( Src&& src, const KeyProjection& key_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Initialize(std::forward(src), key_projection, DefaultValueProjection(), direct_capacity); } template < typename Src, typename KeyProjection = DefaultKeyProjection, typename ValueProjection = DefaultValueProjection, std::enable_if_t< std::conjunction_v< std::negation>, std::negation>, IsForwardIterable, HasProjectableKeys, HasProjectableValues>, int> = 0> explicit HybridDirectMap( Src&& src, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Initialize(std::forward(src), key_projection, value_projection, direct_capacity); } // Builds `HybridDirectMap` from keys and values computed by invoking // `key_projection()` and `value_projection()` with indices from [0..`size`). // // `key_projection()` may be called multiple times for each index so it should // be efficient. `value_projection()` is called once for each index so it can // be expensive. template , std::negation>, std::negation>, HasGeneratableKeys, HasGeneratableValues>, int> = 0> explicit HybridDirectMap( Index size, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->InitializeByIndex(size, key_projection, value_projection, direct_capacity); } HybridDirectMap(const HybridDirectMap& that) = default; HybridDirectMap& operator=(const HybridDirectMap& that) = default; HybridDirectMap(HybridDirectMap&& that) = default; HybridDirectMap& operator=(HybridDirectMap&& that) = default; // Makes `*this` equivalent to a newly constructed `HybridDirectMap`. using HybridDirectMap::HybridDirectMapImpl::Reset; template , HasCompatibleKeys, HasCompatibleValues>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset( Src&& src, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Reset(); this->Initialize(std::forward(src), DefaultKeyProjection(), DefaultValueProjection(), direct_capacity); } ABSL_ATTRIBUTE_REINITIALIZES void Reset( std::initializer_list> src, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Reset(); this->Initialize(src, DefaultKeyProjection(), DefaultValueProjection(), direct_capacity); } template < typename Src, typename KeyProjection = DefaultKeyProjection, std::enable_if_t< std::conjunction_v< std::negation>, IsForwardIterable, HasProjectableKeys, HasCompatibleValues>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset( Src&& src, const KeyProjection& key_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Reset(); this->Initialize(std::forward(src), key_projection, DefaultValueProjection(), direct_capacity); } template < typename Src, typename KeyProjection = DefaultKeyProjection, typename ValueProjection = DefaultValueProjection, std::enable_if_t< std::conjunction_v< std::negation>, std::negation>, IsForwardIterable, HasProjectableKeys, HasProjectableValues>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset( Src&& src, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Reset(); this->Initialize(std::forward(src), key_projection, value_projection, direct_capacity); } template , std::negation>, std::negation>, HasGeneratableKeys, HasGeneratableValues>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset( Index size, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { this->Reset(); this->InitializeByIndex(size, key_projection, value_projection, direct_capacity); } friend bool operator==(const HybridDirectMap& a, const HybridDirectMap& b) { return HybridDirectMap::HybridDirectMapImpl::Equal(a, b); } }; namespace hybrid_direct_internal { template template class HybridDirectMapImpl::IteratorImpl : public WithEqual> { public: using iterator_concept = std::forward_iterator_tag; // `iterator_category` is only `std::input_iterator_tag` because the // `LegacyForwardIterator` requirement and above require `reference` to be // a true reference type. using iterator_category = std::input_iterator_tag; using value_type = std::pair; using reference = ReferencePair>; using pointer = ArrowProxy; using difference_type = ptrdiff_t; IteratorImpl() = default; // Conversion from `iterator` to `const_iterator`. template = 0> /*implicit*/ IteratorImpl(IteratorImpl that) noexcept : direct_map_end_(that.direct_map_end_), direct_map_size_(that.direct_map_size_), raw_key_complement_(that.raw_key_complement_), slow_map_iter_(that.slow_map_iter_) {} IteratorImpl(const IteratorImpl& that) = default; IteratorImpl& operator=(const IteratorImpl& that) = default; reference operator*() const { if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) { return reference{Traits::FromRawKey(IntCast(direct_map_size_ - raw_key_complement_)), **(direct_map_end_ - raw_key_complement_)}; } const auto iter = *slow_map_iter_; return reference{Traits::FromRawKey(iter->first), iter->second}; } pointer operator->() const { return pointer(**this); } IteratorImpl& operator++() { if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) { do { --raw_key_complement_; if (ABSL_PREDICT_FALSE(raw_key_complement_ == 0)) break; } while (*(direct_map_end_ - raw_key_complement_) == nullptr); } else { ++*slow_map_iter_; } return *this; } IteratorImpl operator++(int) { IteratorImpl result = *this; ++*this; return result; } friend bool operator==(IteratorImpl a, IteratorImpl b) { RIEGELI_ASSERT_EQ(a.direct_map_end_, b.direct_map_end_) << "Failed precondition of operator==(HybridDirectMap::iterator): " "incomparable iterators"; RIEGELI_ASSERT_EQ(a.direct_map_size_, b.direct_map_size_) << "Failed precondition of operator==(HybridDirectMap::iterator): " "incomparable iterators"; RIEGELI_ASSERT_EQ(a.slow_map_iter_ != std::nullopt, b.slow_map_iter_ != std::nullopt) << "Failed precondition of operator==(HybridDirectMap::iterator): " "incomparable iterators"; if (a.raw_key_complement_ != b.raw_key_complement_) return false; if (ABSL_PREDICT_TRUE(a.slow_map_iter_ == std::nullopt)) return true; return *a.slow_map_iter_ == *b.slow_map_iter_; } private: friend class HybridDirectMapImpl; explicit IteratorImpl(std::conditional_t map ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t raw_key_complement) : direct_map_end_(map->direct_map_.get() + map->direct_map_.get_deleter().size()), direct_map_size_(map->direct_map_.get_deleter().size()), raw_key_complement_(raw_key_complement) {} explicit IteratorImpl( std::conditional_t map ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t raw_key_complement, std::conditional_t slow_map_iter) : direct_map_end_(map->direct_map_.get() + map->direct_map_.get_deleter().size()), direct_map_size_(map->direct_map_.get_deleter().size()), raw_key_complement_(raw_key_complement), slow_map_iter_(slow_map_iter) {} // The end of the `direct_map_` array. // // Counting backwards simplifies computing `end()` and advancing the iterator. absl_nullable const std::conditional_t< is_const, const Value*, Value*>* absl_nullable direct_map_end_ = nullptr; // `direct_map_.get_deleter().size()`. size_t direct_map_size_ = 0; // `direct_map_size_ - raw_key` when iterating over `direct_map_`, // otherwise 0. // // Invariant: if `raw_key_complement_ > 0` then // `*(direct_map_end_ - raw_key_complement_) != nullptr`. // // Counting backwards simplifies computing `end()` and advancing the iterator. size_t raw_key_complement_ = 0; // Iterator over `*slow_map_` when `slow_map_ != nullptr`, otherwise // `std::nullopt`. // // Invariant: if `raw_key_complement_ > 0` and `slow_map_ != nullptr` then // `slow_map_iter_ == slow_map_->begin()`. // // Distinguishing `std::nullopt` instead of using the default-constructed // `SlowMap::iterator` makes the common case of `operator==` faster by // reducing usage of `SlowMap` iterators. std::optional> slow_map_iter_; }; } // namespace hybrid_direct_internal // Implementation details follow. namespace hybrid_direct_internal { template inline size_t HybridDirectMapImpl::max_size() { return UnsignedMin(SizedDeleter::max_size(), SizedDeleter, /*supports_abandon=*/true>::max_size()) / kInverseMinLoadFactor; } template void HybridDirectMapImpl::Reset() { direct_values_ = DirectValues(); direct_map_ = DirectMap(); slow_map_.reset(); } template template void HybridDirectMapImpl::Initialize( Src&& src, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity) { using std::begin; using std::end; if constexpr (IterableHasSize::value) { using std::size; const size_t src_size = size(src); RIEGELI_ASSERT_EQ(src_size, IntCast(std::distance(begin(src), end(src)))) << "Failed precondition of HybridDirectMap initialization: " "size does not match the distance between iterators"; if (src_size > 0) { Optimize(begin(src), end(src), src_size, key_projection, value_projection, direct_capacity); } } else { auto first = begin(src); auto last = end(src); const size_t src_size = IntCast(std::distance(first, last)); if (src_size > 0) { Optimize(first, last, src_size, key_projection, value_projection, direct_capacity); } } #if RIEGELI_DEBUG // Detect building `HybridDirectMap` from a moved-from `src` if possible. if constexpr (std::conjunction_v>, std::is_move_constructible>) { ABSL_ATTRIBUTE_UNUSED Src moved = std::forward(src); } #endif } template template void HybridDirectMapImpl::InitializeByIndex( Index size, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity) { if (size > 0) { RIEGELI_CHECK_LE(UnsignedCast(size), std::numeric_limits::max()) << "Failed precondition of HybridDirectMap initialization: " "size overflow"; // The template parameter of `Optimize()` serves only to determine whether // to apply `std::move_iterator`. Optimize(hybrid_direct_internal::IndexIterator(0), hybrid_direct_internal::IndexIterator(size), IntCast(size), key_projection, value_projection, direct_capacity); } } template template void HybridDirectMapImpl::Optimize( Iterator first, Iterator last, size_t size, const KeyProjection& key_projection, const ValueProjection& value_projection, size_t direct_capacity) { RIEGELI_ASSERT_GE(size, 0u) << "Failed precondition of HybridDirectMapImpl::Optimize(): " "an empty map must have been handled before"; RIEGELI_CHECK_LE(size, max_size()) << "Failed precondition of HybridDirectMap initialization: " "size overflow"; RawKey max_raw_key = 0; for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); max_raw_key = UnsignedMax(max_raw_key, raw_key); } const size_t max_num_direct_keys = UnsignedMax(direct_capacity, size * kInverseMinLoadFactor); size_t direct_values_index; if (max_raw_key < max_num_direct_keys) { // All keys are suitable for `direct_map_`. `slow_map_` is not used. // // There is no need for `direct_map_` to cover raw keys above `max_raw_key` // because their lookup is fast if `slow_map_` is `nullptr`. hybrid_direct_internal::AssignToAssumedNull( direct_values_, MakeSizedArray, /*supports_abandon=*/true>( size)); hybrid_direct_internal::AssignToAssumedNull( direct_map_, MakeSizedArray(IntCast(max_raw_key) + 1)); direct_values_index = 0; for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); if (ABSL_PREDICT_FALSE(direct_map_[raw_key] != nullptr)) continue; direct_map_[raw_key] = &direct_values_[direct_values_index++].emplace(riegeli::Invoker( value_projection, *MaybeMakeMoveIterator(iter))); } } else { // Some keys are too large for `direct_map_`. `slow_map_` is used. // // `direct_map_` covers all raw keys below `max_num_direct_keys` rather than // only up to `max_raw_key`, to reduce lookups in `slow_map_`. size_t num_direct_values = 0; for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); num_direct_values += raw_key < max_num_direct_keys ? 1 : 0; } RIEGELI_ASSERT_LT(num_direct_values, size) << "Some keys should have been too large for direct_map_"; if (ABSL_PREDICT_FALSE(num_direct_values == 0)) { // The distribution is unfortunate: all keys are too large for // `direct_map_`. No lookup hits can be optimized. Do not allocate // `direct_map_` full of absent keys to save memory, at the cost of // not optimizing any lookup misses. } else { hybrid_direct_internal::AssignToAssumedNull( direct_values_, MakeSizedArray, /*supports_abandon=*/true>( num_direct_values)); hybrid_direct_internal::AssignToAssumedNull( direct_map_, MakeSizedArray(max_num_direct_keys)); } hybrid_direct_internal::AssignToAssumedNull(slow_map_, std::make_unique()); slow_map_->reserve(size - num_direct_values); direct_values_index = 0; for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); if (raw_key < max_num_direct_keys) { if (ABSL_PREDICT_FALSE(direct_map_[raw_key] != nullptr)) continue; direct_map_[raw_key] = &direct_values_[direct_values_index++].emplace(riegeli::Invoker( value_projection, *MaybeMakeMoveIterator(iter))); } else { slow_map_->try_emplace( raw_key, riegeli::Invoker(value_projection, *MaybeMakeMoveIterator(iter))); } } } direct_values_.get_deleter().AbandonAfter(direct_values_.get(), direct_values_index); } template HybridDirectMapImpl::HybridDirectMapImpl( const HybridDirectMapImpl& that) noexcept : direct_values_(that.CopyDirectValues()), direct_map_(that.CopyDirectMap(direct_values_.get())), slow_map_(that.CopySlowMap()) {} template HybridDirectMapImpl& HybridDirectMapImpl::operator=( const HybridDirectMapImpl& that) noexcept { absl_nullable DirectValues new_direct_values = that.CopyDirectValues(); direct_map_ = that.CopyDirectMap(new_direct_values.get()); direct_values_ = std::move(new_direct_values); slow_map_ = that.CopySlowMap(); return *this; } template auto HybridDirectMapImpl::CopyDirectValues() const -> absl_nullable DirectValues { if (direct_values_ == nullptr) return nullptr; DirectValues dest_ptr = MakeSizedArray, /*supports_abandon=*/true>( direct_values_.get_deleter().size()); DelayedConstructor* src_iter = direct_values_.get(); DelayedConstructor* const end = dest_ptr.get() + dest_ptr.get_deleter().size(); for (DelayedConstructor* dest_iter = dest_ptr.get(); dest_iter != end; ++dest_iter) { dest_iter->emplace(**src_iter); ++src_iter; } return dest_ptr; } template auto HybridDirectMapImpl::CopyDirectMap( DelayedConstructor* absl_nullable dest_values) const -> absl_nullable DirectMap { if (direct_map_ == nullptr) return nullptr; DelayedConstructor* const absl_nullable src_values = direct_values_.get(); DirectMap dest_ptr = MakeSizedArrayForOverwrite( direct_map_.get_deleter().size()); Value* absl_nullable* src_iter = direct_map_.get(); Value* absl_nullable* const end = dest_ptr.get() + dest_ptr.get_deleter().size(); for (Value* absl_nullable* dest_iter = dest_ptr.get(); dest_iter != end; ++dest_iter) { *dest_iter = *src_iter == nullptr ? nullptr : reinterpret_cast(reinterpret_cast(dest_values) + ((reinterpret_cast(*src_iter) - reinterpret_cast(src_values)))); ++src_iter; } return dest_ptr; } template auto HybridDirectMapImpl::CopySlowMap() const -> absl_nullable std::unique_ptr { if (slow_map_ == nullptr) return nullptr; return std::make_unique(*slow_map_); } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline Value* absl_nullable HybridDirectMapImpl::FindOrNull(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND { return const_cast(std::as_const(*this).FindOrNull(key)); } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline const Value* absl_nullable HybridDirectMapImpl::FindOrNull(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() || direct_map_ != nullptr) << "Moved-from HybridDirectMap"; const RawKey raw_key = Traits::ToRawKey(key); if (raw_key < direct_map_.get_deleter().size()) return direct_map_[raw_key]; if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return nullptr; const auto iter = slow_map_->find(raw_key); if (iter == slow_map_->end()) return nullptr; return &iter->second; } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline const Value& HybridDirectMapImpl::FindOrDefault( Key key, const Value& default_value ABSL_ATTRIBUTE_LIFETIME_BOUND) const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() || direct_map_ != nullptr) << "Moved-from HybridDirectMap"; const RawKey raw_key = Traits::ToRawKey(key); if (raw_key < direct_map_.get_deleter().size()) { const Value* const absl_nullable value = direct_map_[raw_key]; if (value == nullptr) return default_value; return *value; } if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return default_value; const auto iter = slow_map_->find(raw_key); if (iter == slow_map_->end()) return default_value; return iter->second; } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline auto HybridDirectMapImpl::find(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator { RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() || direct_map_ != nullptr) << "Moved-from HybridDirectMap"; const RawKey raw_key = Traits::ToRawKey(key); if (raw_key < direct_map_.get_deleter().size()) { if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) { return iterator(this, direct_map_[raw_key] == nullptr ? 0 : direct_map_.get_deleter().size() - raw_key); } if (direct_map_[raw_key] == nullptr) { return iterator(this, 0, slow_map_->end()); } return iterator(this, direct_map_.get_deleter().size() - raw_key, slow_map_->begin()); } if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return iterator(this, 0); return iterator(this, 0, slow_map_->find(raw_key)); } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline auto HybridDirectMapImpl::find(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator { RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() || direct_map_ != nullptr) << "Moved-from HybridDirectMap"; const RawKey raw_key = Traits::ToRawKey(key); if (raw_key < direct_map_.get_deleter().size()) { if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) { return const_iterator(this, direct_map_[raw_key] == nullptr ? 0 : direct_map_.get_deleter().size() - raw_key); } if (direct_map_[raw_key] == nullptr) { return const_iterator(this, 0, slow_map_->cend()); } return const_iterator(this, direct_map_.get_deleter().size() - raw_key, slow_map_->cbegin()); } if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return const_iterator(this, 0); return const_iterator(this, 0, std::as_const(*slow_map_).find(raw_key)); } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool HybridDirectMapImpl::contains(Key key) const { RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() || direct_map_ != nullptr) << "Moved-from HybridDirectMap"; const RawKey raw_key = Traits::ToRawKey(key); if (raw_key < direct_map_.get_deleter().size()) { return direct_map_[raw_key] != nullptr; } if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return false; return slow_map_->contains(raw_key); } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline Value& HybridDirectMapImpl::at(Key key) ABSL_ATTRIBUTE_LIFETIME_BOUND { return const_cast(std::as_const(*this).at(key)); } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline const Value& HybridDirectMapImpl::at(Key key) const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT(!direct_map_.get_deleter().IsMovedFromIfNull() || direct_map_ != nullptr) << "Moved-from HybridDirectMap"; const RawKey raw_key = Traits::ToRawKey(key); if (raw_key < direct_map_.get_deleter().size()) { const Value* const absl_nullable value = direct_map_[raw_key]; if (ABSL_PREDICT_FALSE(value == nullptr)) KeyNotFound(key); return *value; } if (ABSL_PREDICT_FALSE(slow_map_ == nullptr)) KeyNotFound(key); const auto iter = slow_map_->find(raw_key); if (ABSL_PREDICT_FALSE(iter == slow_map_->end())) KeyNotFound(key); return iter->second; } template ABSL_ATTRIBUTE_NORETURN void HybridDirectMapImpl::KeyNotFound(Key key) { RIEGELI_CHECK_UNREACHABLE() << "HybridDirectMap key not found: " << riegeli::Debug(key); } template inline size_t HybridDirectMapImpl::FirstRawKey() const { const size_t direct_map_size = direct_map_.get_deleter().size(); for (size_t raw_key = 0; raw_key < direct_map_size; ++raw_key) { if (direct_map_[raw_key] != nullptr) return raw_key; } return direct_map_size; } template inline size_t HybridDirectMapImpl::size() const { return direct_values_.get_deleter().size() + (ABSL_PREDICT_TRUE(slow_map_ == nullptr) ? 0 : slow_map_->size()); } template inline auto HybridDirectMapImpl::begin() ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator { const size_t raw_key_complement = direct_map_.get_deleter().size() - FirstRawKey(); if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) { return iterator(this, raw_key_complement); } return iterator(this, raw_key_complement, slow_map_->begin()); } template inline auto HybridDirectMapImpl::begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator { const size_t raw_key_complement = direct_map_.get_deleter().size() - FirstRawKey(); if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) { return const_iterator(this, raw_key_complement); } return const_iterator(this, raw_key_complement, slow_map_->cbegin()); } template inline auto HybridDirectMapImpl::end() ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator { if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return iterator(this, 0); return iterator(this, 0, slow_map_->end()); } template inline auto HybridDirectMapImpl::end() const ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator { if (ABSL_PREDICT_TRUE(slow_map_ == nullptr)) return const_iterator(this, 0); return const_iterator(this, 0, slow_map_->cend()); } template bool HybridDirectMapImpl::Equal( const HybridDirectMapImpl& a, const HybridDirectMapImpl& b) { if (a.size() != b.size()) return false; const HybridDirectMapImpl* outer; const HybridDirectMapImpl* inner; if (a.capacity() <= b.capacity()) { outer = &a; inner = &b; } else { outer = &b; inner = &a; } for (const_reference entry : *outer) { const auto* const found = inner->FindOrNull(entry.first); if (found == nullptr || *found != entry.second) return false; } return true; } } // namespace hybrid_direct_internal } // namespace riegeli #endif // RIEGELI_BASE_HYBRID_DIRECT_MAP_H_ ================================================ FILE: riegeli/base/hybrid_direct_set.h ================================================ // Copyright 2025 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_HYBRID_DIRECT_SET_H_ #define RIEGELI_BASE_HYBRID_DIRECT_SET_H_ #include #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "absl/base/optimization.h" #include "absl/container/flat_hash_set.h" #include "riegeli/base/arithmetic.h" #include "riegeli/base/assert.h" #include "riegeli/base/compare.h" #include "riegeli/base/hybrid_direct_common.h" // IWYU pragma: export #include "riegeli/base/hybrid_direct_internal.h" #include "riegeli/base/iterable.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `HybridDirectSet` is a set optimized for keys being mostly small integers // or enums, especially if they are dense near zero. It supports only lookups // and iteration, but no incremental modification. // // It stores a part of the set covering some range of small keys in an array // of booleans, directly indexed by the key. The remaining keys are stored in // an `absl::flat_hash_set`. // // `Traits` specifies a mapping of keys to an unsigned integer type. It must // support at least the following static members: // // ``` // // Translates the key to a raw key, which is an unsigned integer type. // // Small raw keys are put in the array. // static RawKey ToRawKey(Key key); // // // Translates the raw key back to a key. // // // // This is optional. Needed only for iterators. // static Key FromRawKey(RawKey raw_key); // ``` // // `direct_capacity`, if specified during building, is the intended capacity // of the array part. The actual capacity can be smaller if all keys fit // in the array, or larger if the array remains at least 25% full. Default: // `kHybridDirectDefaultDirectCapacity` (128). // // In the case of duplicate keys, one is retained. template > class HybridDirectSet : public WithEqual> { private: template struct HasCompatibleKeys : std::false_type {}; template struct HasCompatibleKeys< Src, std::enable_if_t, Key>>> : std::true_type {}; template struct HasProjectableKeys : std::false_type {}; template struct HasProjectableKeys< Src, KeyProjection, std::enable_if_t>, Key>>> : std::true_type {}; template struct HasGeneratableKeys : std::false_type {}; template struct HasGeneratableKeys< Index, KeyProjection, std::enable_if_t, Key>>> : std::true_type {}; template struct DefaultKeyProjection { Key operator()(ElementTypeT key) const { return key; } }; public: using value_type = Key; using reference = Key; using const_reference = Key; using pointer = void; using const_pointer = void; class iterator; using const_iterator = iterator; using size_type = size_t; using difference_type = ptrdiff_t; static size_t max_size(); // Constructs an empty `HybridDirectSet`. HybridDirectSet() = default; // Builds `HybridDirectSet` from an iterable `src`. template < typename Src, std::enable_if_t< std::conjunction_v, IsForwardIterable, HasCompatibleKeys>, int> = 0> explicit HybridDirectSet(const Src& src) { Initialize(src, DefaultKeyProjection(), kHybridDirectDefaultDirectCapacity); } template , HasCompatibleKeys>, int> = 0> explicit HybridDirectSet(const Src& src, size_t direct_capacity) { Initialize(src, DefaultKeyProjection(), direct_capacity); } // Builds `HybridDirectSet` from an initializer list. /*implicit*/ HybridDirectSet( std::initializer_list src, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { Initialize(src, DefaultKeyProjection(), direct_capacity); } // Builds `HybridDirectSet` from an iterable `src`. // // Keys are extracted using `key_projection()`. `key_projection()` may be // called multiple times for each key so it should be efficient. template < typename Src, typename KeyProjection = DefaultKeyProjection, std::enable_if_t< std::conjunction_v< std::negation>, IsForwardIterable, HasProjectableKeys>, int> = 0> explicit HybridDirectSet( const Src& src, const KeyProjection& key_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { Initialize(src, key_projection, direct_capacity); } // Builds `HybridDirectSet` from keys computed by invoking `key_projection()` // with indices from [0..`size`). // // `key_projection()` may be called multiple times for each index so it should // be efficient. template , std::negation>, HasGeneratableKeys>, int> = 0> explicit HybridDirectSet( Index size, const KeyProjection& key_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { InitializeByIndex(size, key_projection, direct_capacity); } HybridDirectSet(const HybridDirectSet& that) noexcept; HybridDirectSet& operator=(const HybridDirectSet& that) noexcept; HybridDirectSet(HybridDirectSet&& that) = default; HybridDirectSet& operator=(HybridDirectSet&& that) = default; // Makes `*this` equivalent to a newly constructed `HybridDirectSet`. ABSL_ATTRIBUTE_REINITIALIZES void Reset(); template , HasCompatibleKeys>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset( const Src& src, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { Reset(); Initialize(src, DefaultKeyProjection(), direct_capacity); } ABSL_ATTRIBUTE_REINITIALIZES void Reset( std::initializer_list src, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { Reset(); Initialize(src, DefaultKeyProjection(), direct_capacity); } template < typename Src, typename KeyProjection = DefaultKeyProjection, std::enable_if_t< std::conjunction_v< std::negation>, IsForwardIterable, HasProjectableKeys>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset( const Src& src, const KeyProjection& key_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { Reset(); Initialize(src, key_projection, direct_capacity); } template , std::negation>, HasGeneratableKeys>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset( Index size, const KeyProjection& key_projection, size_t direct_capacity = kHybridDirectDefaultDirectCapacity) { Reset(); InitializeByIndex(size, key_projection, direct_capacity); } bool contains(Key key) const; bool empty() const { return size_ == 0; } size_t size() const { return size_; } iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return begin(); } iterator end() ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND; const_iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); } friend bool operator==(const HybridDirectSet& a, const HybridDirectSet& b) { return Equal(a, b); } private: using RawKey = std::decay_t()))>; static_assert(std::is_unsigned_v); using DirectSet = hybrid_direct_internal::SizedArray; using SlowSet = absl::flat_hash_set; static constexpr int kInverseMinLoadFactor = 4; // 25%. template void Initialize(const Src& src, const KeyProjection& key_projection, size_t direct_capacity); template void InitializeByIndex(Index size, const KeyProjection& key_projection, size_t direct_capacity); template void Optimize(Iterator first, Iterator last, size_t size, const KeyProjection& key_projection, size_t direct_capacity); absl_nullable DirectSet CopyDirectSet() const; absl_nullable std::unique_ptr CopySlowSet() const; size_t FirstRawKey() const; size_t capacity() const { return direct_set_.get_deleter().size() + (slow_set_ == nullptr ? 0 : slow_set_->capacity()); } static bool Equal(const HybridDirectSet& a, const HybridDirectSet& b); // Indexed by raw key below `direct_set_.get_deleter().size()`. absl_nullable DirectSet direct_set_; // If not `nullptr`, stores the set of keys too large for `direct_set_`. // Uses `std::unique_ptr` rather than `std::optional` to reduce memory usage // in the common case when `slow_set_` is not used. // // Invariant: if `slow_set_ != nullptr` then `!slow_set_->empty()`. absl_nullable std::unique_ptr slow_set_; size_t size_ = 0; }; template class HybridDirectSet::iterator : public WithEqual { public: using iterator_concept = std::forward_iterator_tag; // `iterator_category` is only `std::input_iterator_tag` because the // `LegacyForwardIterator` requirement and above require `reference` to be // a true reference type. using iterator_category = std::input_iterator_tag; using value_type = Key; using reference = Key; using pointer = void; using difference_type = ptrdiff_t; iterator() = default; iterator(const iterator& that) = default; iterator& operator=(const iterator& that) = default; reference operator*() const { if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) { return Traits::FromRawKey( IntCast(direct_set_size_ - raw_key_complement_)); } return Traits::FromRawKey(**slow_set_iter_); } iterator& operator++() { if (ABSL_PREDICT_TRUE(raw_key_complement_ > 0)) { do { --raw_key_complement_; if (ABSL_PREDICT_FALSE(raw_key_complement_ == 0)) break; } while (!*(direct_set_end_ - raw_key_complement_)); } else { ++*slow_set_iter_; } return *this; } iterator operator++(int) { iterator result = *this; ++*this; return result; } friend bool operator==(iterator a, iterator b) { RIEGELI_ASSERT_EQ(a.direct_set_end_, b.direct_set_end_) << "Failed precondition of operator==(HybridDirectSet::iterator): " "incomparable iterators"; RIEGELI_ASSERT_EQ(a.direct_set_size_, b.direct_set_size_) << "Failed precondition of operator==(HybridDirectSet::iterator): " "incomparable iterators"; RIEGELI_ASSERT_EQ(a.slow_set_iter_ != std::nullopt, b.slow_set_iter_ != std::nullopt) << "Failed precondition of operator==(HybridDirectSet::iterator): " "incomparable iterators"; if (a.raw_key_complement_ != b.raw_key_complement_) return false; if (ABSL_PREDICT_TRUE(a.slow_set_iter_ == std::nullopt)) return true; return *a.slow_set_iter_ == *b.slow_set_iter_; } private: friend class HybridDirectSet; explicit iterator(const HybridDirectSet* set ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t raw_key_complement) : direct_set_end_(set->direct_set_.get() + set->direct_set_.get_deleter().size()), direct_set_size_(set->direct_set_.get_deleter().size()), raw_key_complement_(raw_key_complement) {} explicit iterator(const HybridDirectSet* set ABSL_ATTRIBUTE_LIFETIME_BOUND, size_t raw_key_complement, typename SlowSet::const_iterator slow_set_iter) : direct_set_end_(set->direct_set_.get() + set->direct_set_.get_deleter().size()), direct_set_size_(set->direct_set_.get_deleter().size()), raw_key_complement_(raw_key_complement), slow_set_iter_(slow_set_iter) {} // The end of the `direct_set_` array. // // Counting backwards simplifies checking for iteration over `direct_set_`. const bool* absl_nullable direct_set_end_ = nullptr; // `direct_set_.get_deleter().size()`. size_t direct_set_size_ = 0; // `direct_set_size_ - raw_key` when iterating over `direct_set_`, // otherwise 0. // // Invariant: if `raw_key_complement_ > 0` then // `*(direct_set_end_ - raw_key_complement_) != nullptr`. // // Counting backwards simplifies computing `end()` and advancing the iterator. size_t raw_key_complement_ = 0; // Iterator over `*slow_set_` when `slow_set_ != nullptr`, otherwise // `std::nullopt`. // // Invariant: if `raw_key_complement_ > 0` and `slow_set_ != nullptr` then // `slow_set_iter_ == slow_set_->begin()`. // // Distinguishing `std::nullopt` instead of using the default-constructed // `SlowSet::iterator` makes the common case of `operator==` faster by // reducing usage of `SlowSet` iterators. std::optional slow_set_iter_; }; // Implementation details follow. template inline size_t HybridDirectSet::max_size() { return hybrid_direct_internal::SizedDeleter::max_size() / kInverseMinLoadFactor; } template HybridDirectSet::HybridDirectSet( const HybridDirectSet& that) noexcept : direct_set_(that.CopyDirectSet()), slow_set_(that.CopySlowSet()), size_(that.size_) {} template HybridDirectSet& HybridDirectSet::operator=( const HybridDirectSet& that) noexcept { direct_set_ = that.CopyDirectSet(); slow_set_ = that.CopySlowSet(); size_ = that.size_; return *this; } template void HybridDirectSet::Reset() { direct_set_ = DirectSet(); slow_set_.reset(); size_ = 0; } template template void HybridDirectSet::Initialize( const Src& src, const KeyProjection& key_projection, size_t direct_capacity) { using std::begin; using std::end; if constexpr (IterableHasSize::value) { using std::size; const size_t src_size = size(src); RIEGELI_ASSERT_EQ(src_size, IntCast(std::distance(begin(src), end(src)))) << "Failed precondition of HybridDirectSet initialization: " "size does not match the distance between iterators"; if (src_size > 0) { Optimize(begin(src), end(src), src_size, key_projection, direct_capacity); } } else { auto first = begin(src); auto last = end(src); const size_t src_size = IntCast(std::distance(first, last)); if (src_size > 0) Optimize(first, last, src_size, key_projection, direct_capacity); } } template template void HybridDirectSet::InitializeByIndex( Index size, const KeyProjection& key_projection, size_t direct_capacity) { if (size > 0) { RIEGELI_CHECK_LE(UnsignedCast(size), std::numeric_limits::max()) << "Failed precondition of HybridDirectSet initialization: " "size overflow"; Optimize(hybrid_direct_internal::IndexIterator(0), hybrid_direct_internal::IndexIterator(size), IntCast(size), key_projection, direct_capacity); } } template template void HybridDirectSet::Optimize(Iterator first, Iterator last, size_t size, const KeyProjection& key_projection, size_t direct_capacity) { RIEGELI_ASSERT_GE(size, 0u) << "Failed precondition of HybridDirectSet::Optimize(): " "an empty map must have been handled before"; RIEGELI_CHECK_LE(size, max_size()) << "Failed precondition of HybridDirectSet initialization: " "size overflow"; RawKey max_raw_key = 0; for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); max_raw_key = UnsignedMax(max_raw_key, raw_key); } const size_t max_num_direct_keys = UnsignedMax(direct_capacity, size * kInverseMinLoadFactor); size_ = size; if (max_raw_key < max_num_direct_keys) { // All keys are suitable for `direct_set_`. `slow_set_` is not used. // // There is no need for `direct_set_` to cover raw keys above `max_raw_key` // because their lookup is fast if `slow_set_` is `nullptr`. hybrid_direct_internal::AssignToAssumedNull( direct_set_, hybrid_direct_internal::MakeSizedArray( IntCast(max_raw_key) + 1)); for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); if (ABSL_PREDICT_FALSE(direct_set_[raw_key])) --size_; direct_set_[raw_key] = true; } } else { // Some keys are too large for `direct_set_`. `slow_set_` is used. // // `direct_set_` covers all raw keys below `max_num_direct_keys` rather than // only up to `max_raw_key`, to reduce lookups in `slow_set_`. size_t num_direct_elements = 0; for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); num_direct_elements += raw_key < max_num_direct_keys ? 1 : 0; } RIEGELI_ASSERT_LT(num_direct_elements, size) << "Some keys should have been too large for direct_set_"; if (ABSL_PREDICT_FALSE(num_direct_elements == 0)) { // The distribution is unfortunate: all keys are too large for // `direct_set_`. No lookup hits can be optimized. Do not allocate // `direct_set_` full of absent keys to save memory, at the cost of // not optimizing any lookup misses. } else { hybrid_direct_internal::AssignToAssumedNull( direct_set_, hybrid_direct_internal::MakeSizedArray(max_num_direct_keys)); } hybrid_direct_internal::AssignToAssumedNull(slow_set_, std::make_unique()); slow_set_->reserve(size - num_direct_elements); for (auto iter = first; iter != last; ++iter) { const RawKey raw_key = Traits::ToRawKey(std::invoke(key_projection, *iter)); if (raw_key < max_num_direct_keys) { if (ABSL_PREDICT_FALSE(direct_set_[raw_key])) --size_; direct_set_[raw_key] = true; } else { const auto inserted = slow_set_->insert(raw_key); if (ABSL_PREDICT_FALSE(!inserted.second)) --size_; } } } } template auto HybridDirectSet::CopyDirectSet() const -> absl_nullable DirectSet { if (direct_set_ == nullptr) return nullptr; DirectSet dest_ptr = hybrid_direct_internal::MakeSizedArrayForOverwrite( direct_set_.get_deleter().size()); std::memcpy(dest_ptr.get(), direct_set_.get(), dest_ptr.get_deleter().size() * sizeof(bool)); return dest_ptr; } template auto HybridDirectSet::CopySlowSet() const -> absl_nullable std::unique_ptr { if (slow_set_ == nullptr) return nullptr; return std::make_unique(*slow_set_); } template ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool HybridDirectSet::contains( Key key) const { RIEGELI_ASSERT(!direct_set_.get_deleter().IsMovedFromIfNull() || direct_set_ != nullptr) << "Moved-from HybridDirectSet"; const RawKey raw_key = Traits::ToRawKey(key); if (raw_key < direct_set_.get_deleter().size()) return direct_set_[raw_key]; if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) return false; return slow_set_->contains(raw_key); } template inline size_t HybridDirectSet::FirstRawKey() const { const size_t direct_set_size = direct_set_.get_deleter().size(); for (size_t raw_key = 0; raw_key < direct_set_size; ++raw_key) { if (direct_set_[raw_key]) return raw_key; } return direct_set_size; } template inline auto HybridDirectSet::begin() ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator { const size_t raw_key_complement = direct_set_.get_deleter().size() - FirstRawKey(); if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) { return iterator(this, raw_key_complement); } return iterator(this, raw_key_complement, slow_set_->begin()); } template inline auto HybridDirectSet::begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator { const size_t raw_key_complement = direct_set_.get_deleter().size() - FirstRawKey(); if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) { return const_iterator(this, raw_key_complement); } return const_iterator(this, raw_key_complement, slow_set_->cbegin()); } template inline auto HybridDirectSet::end() ABSL_ATTRIBUTE_LIFETIME_BOUND -> iterator { if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) return iterator(this, 0); return iterator(this, 0, slow_set_->end()); } template inline auto HybridDirectSet::end() const ABSL_ATTRIBUTE_LIFETIME_BOUND -> const_iterator { if (ABSL_PREDICT_TRUE(slow_set_ == nullptr)) return const_iterator(this, 0); return const_iterator(this, 0, slow_set_->cend()); } template bool HybridDirectSet::Equal(const HybridDirectSet& a, const HybridDirectSet& b) { if (a.size() != b.size()) return false; const HybridDirectSet* outer; const HybridDirectSet* inner; if (a.capacity() <= b.capacity()) { outer = &a; inner = &b; } else { outer = &b; inner = &a; } for (Key key : *outer) { if (!inner->contains(key)) return false; } return true; } } // namespace riegeli #endif // RIEGELI_BASE_HYBRID_DIRECT_SET_H_ ================================================ FILE: riegeli/base/initializer.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_INITIALIZER_H_ #define RIEGELI_BASE_INITIALIZER_H_ #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "riegeli/base/initializer_internal.h" #include "riegeli/base/invoker.h" #include "riegeli/base/maker.h" #include "riegeli/base/reset.h" #include "riegeli/base/temporary_storage.h" #include "riegeli/base/type_erased_ref.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { template class Initializer; namespace initializer_internal { // `IsInitializer` detects `Initializer` types with the given target type. template struct IsInitializer : std::false_type {}; template struct IsInitializer> : std::true_type {}; // Part of `Initializer` for `T` being a non-reference type. template class InitializerBase { public: // Constructs the `T`. /*implicit*/ operator T() && { return std::move(*this).Construct(); } // Constructs the `T`. // // Usually conversion to `T` is preferred because it can avoid creating a // temporary if the context accepts an arbitrary type convertible to `T` and // it leads to simpler source code. An explicit `Construct()` call can force // construction right away while avoiding specifying the full target type. T Construct() && { return methods()->construct(context()); } // Constructs the `std::decay_t` on the heap. // // In contrast to `std::make_unique()`, this supports custom deleters. // // For a non-default-constructed deleter, use `UniquePtr(deleter)`. template *, Target*>, int> = 0> /*implicit*/ operator std::unique_ptr() && { return std::move(*this).template UniquePtr(); } template *, Target*>, int> = 0> /*implicit*/ operator std::unique_ptr() const& { return UniquePtr(); } // Constructs the `std::decay_t` on the heap. // // In contrast to `std::make_unique()`, this supports custom deleters. // // Usually conversion to `std::unique_ptr` is preferred because it leads to // simpler source code. An explicit `UniquePtr()` call can force construction // right away while avoiding writing the full target type, and it allows to // use a non-default-constructed deleter. template >> std::unique_ptr, Deleter> UniquePtr() && { return std::unique_ptr, Deleter>( new std::decay_t(std::move(*this))); } template std::unique_ptr, Deleter> UniquePtr(Deleter&& deleter) && { return std::unique_ptr, Deleter>( new std::decay_t(std::move(*this)), std::forward(deleter)); } // Constructs the `T` in `storage` which must outlive the returned reference, // or returns a reference to an already constructed object if a compatible // object was passed to `Initializer` constructor. // // `Reference()` instead of conversion to `T` or `Construct()` can avoid // moving the object if the caller does not need to store the object, or if it // will be moved later because the target location for the object is not ready // yet. // // `storage` must outlive usages of the returned reference. T&& Reference( TemporaryStorage&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND = {}) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return methods()->reference(context(), std::move(storage)); } private: static T ConstructMethodDefault(TypeErasedRef context); template static T ConstructMethodFromObject(TypeErasedRef context); template static T ConstructMethodFromMaker(TypeErasedRef context); template static T ConstructMethodFromConstMaker(TypeErasedRef context); template static T ConstructMethodFromConvertedReference(TypeErasedRef context); static T&& ReferenceMethodDefault(TypeErasedRef context, TemporaryStorage&& storage); template static T&& ReferenceMethodFromObject(TypeErasedRef context, TemporaryStorage&& storage); template static T&& ReferenceMethodFromMaker(TypeErasedRef context, TemporaryStorage&& storage); template static T&& ReferenceMethodFromConstMaker(TypeErasedRef context, TemporaryStorage&& storage); template static T&& ReferenceMethodFromConvertedReference( TypeErasedRef context, TemporaryStorage&& storage); protected: struct Methods { T (*construct)(TypeErasedRef context); T && (*reference)(TypeErasedRef context, TemporaryStorage&& storage); }; explicit InitializerBase(const Methods* methods); template explicit InitializerBase(const Methods* methods, Arg&& arg); InitializerBase(InitializerBase&& that) = default; InitializerBase& operator=(InitializerBase&&) = delete; template static constexpr Methods kMethodsDefault = {ConstructMethodDefault, ReferenceMethodDefault}; template static constexpr Methods kMethodsFromObject = { ConstructMethodFromObject, ReferenceMethodFromObject}; template static constexpr Methods kMethodsFromMaker = { ConstructMethodFromMaker, ReferenceMethodFromMaker}; template static constexpr Methods kMethodsFromConstMaker = { ConstructMethodFromConstMaker, ReferenceMethodFromConstMaker}; template static constexpr Methods kMethodsFromConvertedReference = { ConstructMethodFromConvertedReference, ReferenceMethodFromConvertedReference}; const Methods* methods() const { return methods_; } TypeErasedRef context() const { return context_; } private: const Methods* methods_; TypeErasedRef context_; }; // Part of `Initializer` for `T` being a move-assignable non-reference type. template class InitializerAssignableBase : public InitializerBase { public: // `riegeli::Reset(dest, Initializer)` makes `dest` equivalent to the // constructed `T`. This avoids constructing a temporary `T` and moving from // it. friend void RiegeliReset(T& dest, InitializerAssignableBase&& src) { src.methods()->reset(src.context(), dest); } private: static void ResetMethodDefault(TypeErasedRef context, T& dest); template static void ResetMethodFromObject(TypeErasedRef context, T& dest); template static void ResetMethodFromMaker(TypeErasedRef context, T& dest); template static void ResetMethodFromConstMaker(TypeErasedRef context, T& dest); template static void ResetMethodFromConvertedReference(TypeErasedRef context, T& dest); protected: struct Methods : InitializerAssignableBase::InitializerBase::Methods { void (*reset)(TypeErasedRef context, T& dest); }; template static constexpr Methods kMethodsDefault = { InitializerAssignableBase::InitializerBase::template kMethodsDefault<>, ResetMethodDefault}; template static constexpr Methods kMethodsFromObject = { InitializerAssignableBase::InitializerBase::template kMethodsFromObject< Arg>, ResetMethodFromObject}; template static constexpr Methods kMethodsFromMaker = { InitializerAssignableBase::InitializerBase::template kMethodsFromMaker< Args...>, ResetMethodFromMaker}; template static constexpr Methods kMethodsFromConstMaker = { InitializerAssignableBase::InitializerBase:: template kMethodsFromConstMaker, ResetMethodFromConstMaker}; template static constexpr Methods kMethodsFromConvertedReference = { InitializerAssignableBase::InitializerBase:: template kMethodsFromConvertedReference, ResetMethodFromConvertedReference}; explicit InitializerAssignableBase(const Methods* methods) : InitializerAssignableBase::InitializerBase(methods) {} template explicit InitializerAssignableBase(const Methods* methods, Arg&& arg) : InitializerAssignableBase::InitializerBase(methods, std::forward(arg)) {} InitializerAssignableBase(InitializerAssignableBase&& that) = default; InitializerAssignableBase& operator=(InitializerAssignableBase&&) = delete; const Methods* methods() const { return static_cast( InitializerAssignableBase::InitializerBase::methods()); } }; // Part of `Initializer` for `T` being a reference type. template class InitializerReference { public: // Constructs the `T`. /*implicit*/ operator T() && { return std::move(*this).Construct(); } // Constructs the `T`. // // Usually conversion to `T` is preferred because it leads to simpler source // code. An explicit `Construct()` call can force construction right away // while avoiding specifying the full target type. T Construct() && { return methods()->construct(context()); } // Constructs the `std::decay_t` on the heap. // // In contrast to `std::make_unique()`, this supports custom deleters. // // For a non-default-constructed deleter, use `UniquePtr(deleter)`. template *, Target*>, int> = 0> /*implicit*/ operator std::unique_ptr() && { return std::move(*this).template UniquePtr(); } template *, Target*>, int> = 0> /*implicit*/ operator std::unique_ptr() const& { return UniquePtr(); } // Constructs the `std::decay_t` on the heap. // // In contrast to `std::make_unique()`, this supports custom deleters. // // Usually conversion to `std::unique_ptr` is preferred because it leads to // simpler source code. An explicit `UniquePtr()` call can force construction // right away while avoiding writing the full target type, and it allows to // use a non-default-constructed deleter. template >, typename DependentT = T, std::enable_if_t< std::is_constructible_v, DependentT>, int> = 0> std::unique_ptr, Deleter> UniquePtr() && { return std::unique_ptr, Deleter>( new std::decay_t(std::move(*this))); } template , DependentT>, int> = 0> std::unique_ptr, Deleter> UniquePtr(Deleter&& deleter) && { return std::unique_ptr, Deleter>( new std::decay_t(std::move(*this)), std::forward(deleter)); } // `Reference()` can be defined in terms of conversion to `T` because // reference storage is never used for reference types. // // Unused `storage` parameter makes the signature compatible with the // non-reference specialization. T&& Reference() && ABSL_ATTRIBUTE_LIFETIME_BOUND { // `T` is a reference type here, so `T&&` is the same as `T`. return std::move(*this).Construct(); } T&& Reference(ABSL_ATTRIBUTE_UNUSED TemporaryStorage&& storage) && ABSL_ATTRIBUTE_LIFETIME_BOUND { return std::move(*this).Reference(); } private: template static T ConstructMethodFromObject(TypeErasedRef context); template static T ConstructMethodFromMaker(TypeErasedRef context); template static T ConstructMethodFromConstMaker(TypeErasedRef context); template static T ConstructMethodFromConvertedReference(TypeErasedRef context); protected: struct Methods { T (*construct)(TypeErasedRef context); }; explicit InitializerReference(const Methods* methods); template explicit InitializerReference(const Methods* methods, Arg&& arg); InitializerReference(InitializerReference&& that) = default; InitializerReference& operator=(InitializerReference&&) = delete; template static constexpr Methods kMethodsFromObject = { ConstructMethodFromObject}; template static constexpr Methods kMethodsFromMaker = { ConstructMethodFromMaker}; template static constexpr Methods kMethodsFromConstMaker = { ConstructMethodFromConstMaker}; template static constexpr Methods kMethodsFromConvertedReference = { ConstructMethodFromConvertedReference}; const Methods* methods() const { return methods_; } TypeErasedRef context() const { return context_; } private: const Methods* methods_; TypeErasedRef context_; }; template struct InitializerImpl; template struct InitializerImpl>> { using type = InitializerBase; }; template struct InitializerImpl< T, std::enable_if_t>, std::is_convertible, std::negation>>>> { using type = InitializerBase; }; template struct InitializerImpl< T, std::enable_if_t>, std::is_convertible, std::is_move_assignable>>> { using type = InitializerAssignableBase; }; template struct InitializerImpl>> { using type = InitializerReference; }; } // namespace initializer_internal // A parameter of type `Initializer` allows the caller to specify a `T` by // passing a value convertible to `T`, or constructor arguments for `T` packed // in `riegeli::Maker(args...)` or `riegeli::Maker(args...)`. // // In contrast to accepting `T` directly, this allows to construct the object // in-place, avoiding constructing a temporary and moving from it. This also // avoids separate overloads for `const T&` and `T&&` or a template. // // `Initializer(arg)` does not own `arg`, even if it involves temporaries, // hence it should be used only as a parameter of a function or constructor, // so that the temporaries outlive its usage. Instead of storing an // `Initializer` in a variable or returning it from a function, consider // `riegeli::OwningMaker(args...)`, `MakerTypeFor`, or `T`. template class ABSL_NULLABILITY_COMPATIBLE Initializer : public initializer_internal::InitializerImpl::type { private: using Base = typename initializer_internal::InitializerImpl::type; public: // Constructs `Initializer` which specifies `T()`. template < typename DependentT = T, std::enable_if_t, int> = 0> Initializer() : Base(&Base::template kMethodsDefault<>) {} // Constructs `Initializer` from a value convertible to `T`. template < typename Arg, std::enable_if_t< std::conjunction_v>>, std::is_convertible>, int> = 0> /*implicit*/ Initializer(Arg&& arg ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromObject, std::forward(arg)) {} // Constructs `Initializer` from `std::reference_wrapper` with a // compatible `Arg`. template , std::is_convertible*>>, int> = 0> /*implicit*/ Initializer(std::reference_wrapper arg) : Base(&Base::template kMethodsFromObject, arg.get()) {} // Constructs `Initializer` from constructor arguments for `T` packed in // `riegeli::Maker(args...)`. // // Prefer `Template(riegeli::Maker(args...))` over // `Template(riegeli::Maker(args...))` if CTAD for `Template` can be used. template , int> = 0> /*implicit*/ Initializer( MakerType&& args ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromMaker, std::move(args)) {} template < typename... Args, std::enable_if_t, int> = 0> /*implicit*/ Initializer( const MakerType& args ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromConstMaker, args) {} // Constructs `Initializer` from constructor arguments for `T` packed in // `riegeli::Maker(args...)`. template < typename... Args, std::enable_if_t&&, T>>, std::is_constructible>, int> = 0> /*implicit*/ Initializer( MakerTypeFor&& args ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromMaker, std::move(args).maker()) {} template &, T>>, std::is_constructible>, int> = 0> /*implicit*/ Initializer( const MakerTypeFor& args ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromConstMaker, args.maker()) {} // Constructs `Initializer` from constructor arguments for `T` packed in // `riegeli::Maker(args...)` with a different but compatible `Target`. template >, std::negation&&, T>>, std::is_constructible, IsConvertibleFromResult>, int> = 0> /*implicit*/ Initializer( MakerTypeFor&& args ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromConvertedReference< MakerTypeFor>, std::move(args)) {} template >, std::negation&, T>>, std::is_constructible, IsConvertibleFromResult>, int> = 0> /*implicit*/ Initializer( const MakerTypeFor& args ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromConvertedReference< const MakerTypeFor&>, args) {} // Constructs `Initializer` from a factory function for `T` packed in // `riegeli::Invoker(function, args...)` with a possibly different but // compatible function result. template < typename Function, typename... Args, std::enable_if_t&&, T>>, IsConvertibleFromResult< T, std::invoke_result_t>>, int> = 0> /*implicit*/ Initializer( InvokerType&& invoker ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromObject>, std::move(invoker)) {} template < typename Function, typename... Args, std::enable_if_t< std::conjunction_v< std::negation&, T>>, IsConvertibleFromResult< T, std::invoke_result_t>>, int> = 0> /*implicit*/ Initializer(const InvokerType& invoker ABSL_ATTRIBUTE_LIFETIME_BOUND) : Base(&Base::template kMethodsFromObject< const InvokerType&>, invoker) {} // Constructs `Initializer` from `Initializer` with a different but // compatible `Target`. template < typename Target, std::enable_if_t< std::conjunction_v< std::negation>, std::negation&&, T>>, IsConvertibleFromResult>, int> = 0> /*implicit*/ Initializer(Initializer&& initializer) : Base( &Base::template kMethodsFromConvertedReference>, std::move(initializer)) {} Initializer(Initializer&& that) = default; Initializer& operator=(Initializer&&) = delete; // For `ABSL_NULLABILITY_COMPATIBLE`. using pointer = std::conditional_t, T, void*>; }; // `Target::type` and `TargetT` deduce the appropriate target type such // that `T` is convertible to `Initializer>`. // // This allows a single template to uniformly handle a `Target` passed directly, // as `riegeli::Maker(args...)`, as // `riegeli::Invoker(function, args...)`, or as `Initializer`. This is // also useful for CTAD guides to deduce a template argument as `TargetT`. // // They are undefined in the case of `riegeli::Maker(args...)` which requires // the target type to be specified by the caller, or when the object is not // usable in the given const and reference context. namespace initializer_internal { template struct TargetImpl { using type = Value; }; template struct TargetImpl, Reference> { using type = T&; }; template struct TargetImpl, Reference> { // No `type` member when the target type is unspecified. }; template struct TargetImpl, Reference> : MakerTarget {}; template struct TargetImpl, Reference> : InvokerTarget {}; template struct TargetImpl, Reference> { using type = T; }; }; // namespace initializer_internal template struct Target : initializer_internal::TargetImpl, T&&> {}; template using TargetT = typename Target::type; // `TargetRef::type` and `TargetRefT` are like `TargetT`, but if the // object is already constructed, then they are the corresponding reference type // instead of the value type. It is still true that `T` is convertible to // `Initializer>`. // // This allows to avoid moving or copying the object if a reference to it is // sufficient. namespace initializer_internal { template struct TargetRefImpl { using type = Reference; }; template struct TargetRefImpl, Reference> { using type = T&; }; template struct TargetRefImpl, Reference> { // No `type` member when the target type is unspecified. }; template struct TargetRefImpl, Reference> : MakerTarget {}; template struct TargetRefImpl, Reference> : InvokerTargetRef {}; template struct TargetRefImpl, Reference> { using type = T; }; }; // namespace initializer_internal template struct TargetRef : initializer_internal::TargetRefImpl, T&&> {}; template using TargetRefT = typename TargetRef::type; // Implementation details follow. namespace initializer_internal { template inline InitializerBase::InitializerBase(const Methods* methods) : methods_(methods) {} template template inline InitializerBase::InitializerBase(const Methods* methods, Arg&& arg) : methods_(methods), context_(std::forward(arg)) {} template T InitializerBase::ConstructMethodDefault( ABSL_ATTRIBUTE_UNUSED TypeErasedRef context) { return T(); } template template T InitializerBase::ConstructMethodFromObject(TypeErasedRef context) { return T(context.Cast()); } template template T InitializerBase::ConstructMethodFromMaker(TypeErasedRef context) { return context.Cast>().template Construct(); } template template T InitializerBase::ConstructMethodFromConstMaker(TypeErasedRef context) { return context.Cast&>().template Construct(); } template template T InitializerBase::ConstructMethodFromConvertedReference( TypeErasedRef context) { return T(context.Cast().Reference()); } template T&& InitializerBase::ReferenceMethodDefault( ABSL_ATTRIBUTE_UNUSED TypeErasedRef context, TemporaryStorage&& storage) { return std::move(storage).emplace(); } template template T&& InitializerBase::ReferenceMethodFromObject( TypeErasedRef context, TemporaryStorage&& storage) { if constexpr (CanBindReference::value) { return BindReference(context.Cast()); } else { return std::move(storage).emplace(context.Cast()); } } template template T&& InitializerBase::ReferenceMethodFromMaker( TypeErasedRef context, TemporaryStorage&& storage) { return context.Cast>().template Reference( std::move(storage)); } template template T&& InitializerBase::ReferenceMethodFromConstMaker( TypeErasedRef context, TemporaryStorage&& storage) { return context.Cast&>().template Reference( std::move(storage)); } template template T&& InitializerBase::ReferenceMethodFromConvertedReference( TypeErasedRef context, TemporaryStorage&& storage) { return std::move(storage).emplace(context.Cast().Reference()); } template void InitializerAssignableBase::ResetMethodDefault( ABSL_ATTRIBUTE_UNUSED TypeErasedRef context, T& dest) { riegeli::Reset(dest); } template template void InitializerAssignableBase::ResetMethodFromObject(TypeErasedRef context, T& dest) { riegeli::Reset(dest, context.Cast()); } template template void InitializerAssignableBase::ResetMethodFromMaker(TypeErasedRef context, T& dest) { riegeli::Reset(dest, context.Cast>()); } template template void InitializerAssignableBase::ResetMethodFromConstMaker( TypeErasedRef context, T& dest) { riegeli::Reset(dest, context.Cast&>()); } template template void InitializerAssignableBase::ResetMethodFromConvertedReference( TypeErasedRef context, T& dest) { riegeli::Reset(dest, context.Cast().Reference()); } template inline InitializerReference::InitializerReference(const Methods* methods) : methods_(methods) {} template template inline InitializerReference::InitializerReference(const Methods* methods, Arg&& arg) : methods_(methods), context_(std::forward(arg)) {} template template T InitializerReference::ConstructMethodFromObject(TypeErasedRef context) { return T(context.Cast()); } template template T InitializerReference::ConstructMethodFromMaker(TypeErasedRef context) { return context.Cast>().template Construct(); } template template T InitializerReference::ConstructMethodFromConstMaker( TypeErasedRef context) { return context.Cast&>().template Construct(); } template template T InitializerReference::ConstructMethodFromConvertedReference( TypeErasedRef context) { return T(context.Cast().Reference()); } } // namespace initializer_internal } // namespace riegeli #endif // RIEGELI_BASE_INITIALIZER_H_ ================================================ FILE: riegeli/base/initializer_internal.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_INITIALIZER_INTERNAL_H_ #define RIEGELI_BASE_INITIALIZER_INTERNAL_H_ #include #include #include #include "absl/base/casts.h" #include "absl/base/nullability.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli::initializer_internal { // `CanBindReference::value` is `true` if `Arg&&` can be implicitly // converted to `T&&` without creating a temporary. // // Due to not all compilers implementing http://wg21.link/cwg2352 (converting // `T*&` to `const T* const&` could have bound the result to a temporary), // this covers also the case when the corresponding pointers can be converted. // `BindReference()` should be used for the actual conversion. template struct CanBindReference : std::false_type {}; template struct CanBindReference : std::is_convertible {}; template struct CanBindReference : std::false_type {}; template struct CanBindReference : std::is_convertible { }; template struct CanBindReference : std::false_type {}; template struct CanBindReference : std::is_convertible {}; // `BindReference(arg)` returns `arg` implicitly converted to `T&&`. // // Due to not all compilers implementing http://wg21.link/cwg2352 (converting // `T*&` to `const T* const&` could have bound the result to a temporary), // this is not implemented as a simple implicit conversion, but by converting // the reference to a pointer, implicitly converting the pointer, and // dereferencing back. template ::value, int> = 0> inline T&& BindReference(Arg&& arg) { return std::forward( *absl::implicit_cast*>(&arg)); } } // namespace riegeli::initializer_internal #endif // RIEGELI_BASE_INITIALIZER_INTERNAL_H_ ================================================ FILE: riegeli/base/intrusive_shared_ptr.h ================================================ // Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_INTRUSIVE_SHARED_PTR_H_ #define RIEGELI_BASE_INTRUSIVE_SHARED_PTR_H_ #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "riegeli/base/assert.h" #include "riegeli/base/compare.h" #include "riegeli/base/external_data.h" #include "riegeli/base/initializer.h" #include "riegeli/base/ownership.h" namespace riegeli { namespace intrusive_shared_ptr_internal { template struct HasHasUniqueOwner : std::false_type {}; template struct HasHasUniqueOwner< T, std::enable_if_t().HasUniqueOwner()), bool>>> : std::true_type {}; template struct HasGetCount : std::false_type {}; template struct HasGetCount().GetCount()), size_t>>> : std::true_type {}; } // namespace intrusive_shared_ptr_internal // `IntrusiveSharedPtr` implements shared ownership of an object of type `T`. // It can also be empty, with the pointer being `nullptr`. // // The actual object can be of a subtype of `T`, as long as `T::Unref()` // correctly deletes the object in such a case, which typically requires that // `T` has a virtual destructor. // // `T` maintains its own reference count, e.g. as a member of type `RefCount`. // `T` should support: // // ``` // // Increments the reference count of `*this`. // void Ref() const; // // // Decrements the reference count of `*this`. Deletes `this` when the // // reference count reaches 0. // void Unref() const; // // // Returns `true` if there is only one owner of the object. // // // // This can be used to check if the object may be modified. // // // // Optional. Needed for `IntrusiveSharedPtr::IsUnique()`. // bool HasUniqueOwner() const; // ``` // // Compared to `std::shared_ptr`, `IntrusiveSharedPtr` supports `IsUnique()`, // and has a smaller memory overhead (the pointer has 1 word instead of 2, the // object typically has 1 word of overhead instead of 3). OTOH it requires // cooperation from `T`, and has fewer features, e.g. no aliasing constructor, // no weak pointers. // // Compared to `SharedPtr`, `IntrusiveSharedPtr` is harder to use, because // it requires the object to maintain its own reference count. OTOH // `IntrusiveSharedPtr` supports custom allocation and deallocation, and // conversion to an `IntrusiveSharedPtr` to a non-leftmost or virtual base // class. Prefer `SharedPtr` unless `IntrusiveSharedPtr` is needed. template class ABSL_ATTRIBUTE_TRIVIAL_ABI ABSL_NULLABILITY_COMPATIBLE IntrusiveSharedPtr : public WithEqual> { public: // Creates an empty `IntrusiveSharedPtr`. constexpr IntrusiveSharedPtr() = default; /*implicit*/ constexpr IntrusiveSharedPtr(std::nullptr_t) noexcept {} IntrusiveSharedPtr& operator=(std::nullptr_t) { Reset(); return *this; } // Creates an `IntrusiveSharedPtr` holding `ptr`. // // Takes ownership of `ptr` unless the second parameter is `kShareOwnership`. explicit IntrusiveSharedPtr(T* ptr ABSL_ATTRIBUTE_LIFETIME_BOUND, PassOwnership = kPassOwnership) noexcept : ptr_(ptr) {} explicit IntrusiveSharedPtr(T* ptr ABSL_ATTRIBUTE_LIFETIME_BOUND, ShareOwnership) noexcept : ptr_(Ref(ptr)) {} // Creates an `IntrusiveSharedPtr` holding a constructed value. // // The object is constructed with `new`, which means that `T::Unref()` should // delete the object with `delete this`. explicit IntrusiveSharedPtr(Initializer value) : ptr_(std::move(value)) {} // Creates an `IntrusiveSharedPtr` holding a constructed value of a compatible // type. // // The object is constructed with `new`, which means that `T::Unref()` should // delete the object with `delete this`. template *, T*>, int> = 0> explicit IntrusiveSharedPtr(SubInitializer&& value) : ptr_(Initializer>( std::forward(value))) {} // Converts from an `IntrusiveSharedPtr` with a compatible type. template , int> = 0> /*implicit*/ IntrusiveSharedPtr(const IntrusiveSharedPtr& that) noexcept : ptr_(Ref(that.ptr_.get())) {} template , int> = 0> IntrusiveSharedPtr& operator=(const IntrusiveSharedPtr& that) noexcept { ptr_.reset(Ref(that.ptr_.get())); return *this; } // Converts from an `IntrusiveSharedPtr` with a compatible type. // // The source `IntrusiveSharedPtr` is left empty. template , int> = 0> /*implicit*/ IntrusiveSharedPtr(IntrusiveSharedPtr&& that) noexcept : ptr_(std::move(that).ptr_) {} template , int> = 0> IntrusiveSharedPtr& operator=(IntrusiveSharedPtr&& that) noexcept { ptr_.reset(std::move(that).ptr_); return *this; } IntrusiveSharedPtr(const IntrusiveSharedPtr& that) noexcept : ptr_(Ref(that.ptr_.get())) {} IntrusiveSharedPtr& operator=(const IntrusiveSharedPtr& that) noexcept { ptr_.reset(Ref(that.ptr_.get())); return *this; } // The source `IntrusiveSharedPtr` is left empty. IntrusiveSharedPtr(IntrusiveSharedPtr&& that) = default; IntrusiveSharedPtr& operator=(IntrusiveSharedPtr&& that) = default; // Replaces the object, or makes `*this` empty if `ptr == nullptr`. // // Takes ownership of `ptr` unless the second parameter is `kShareOwnership`. // // The old object, if any, is destroyed afterwards. ABSL_ATTRIBUTE_REINITIALIZES void Reset(T* ptr = nullptr, PassOwnership = kPassOwnership) { ptr_.reset(ptr); } ABSL_ATTRIBUTE_REINITIALIZES void Reset(T* ptr, ShareOwnership) { ptr_.reset(Ref(ptr)); } // Replaces the object with a constructed value. // // The old object, if any, is destroyed afterwards. // // The object is constructed with `new`, which means that `T::Unref()` should // delete the object with `delete this`. // // If `T` supports `HasUniqueOwner()` and `*this` is the only owner of an // object known to have the same move-assignable type, the existing object is // assigned or reset instead of allocating and constructing a new object. ABSL_ATTRIBUTE_REINITIALIZES void Reset(Initializer value) { ResetImpl(std::move(value)); } // Replaces the object with a constructed value of a compatible type. // // The old object, if any, is destroyed afterwards. // // The object is constructed with `new`, which means that `T::Unref()` should // delete the object with `delete this`. template *, T*>, int> = 0> ABSL_ATTRIBUTE_REINITIALIZES void Reset(SubInitializer&& value) { ptr_ = Initializer>( std::forward(value)); } // Returns `true` if `*this` is the only owner of the object. // // This can be used to check if the object may be modified (in contrast to // `std::shared_ptr::unique()`). // // If `*this` is empty, returns `false`. // // Supported if `T` supports `HasUniqueOwner()`. template ::value, int> = 0> bool IsUnique() const { return ptr_ != nullptr && ptr_->HasUniqueOwner(); } // Returns the current reference count. // // If the `IntrusiveSharedPtr` is accessed by multiple threads, this is a // snapshot of the count which may change asynchronously, hence usage of // `GetRefCount()` should be limited to cases not important for correctness, // like producing debugging output. // // The reference count can be reliably compared against 1 with `IsUnique()`. // // Supported if `T` supports `GetCount()`. template ::value, int> = 0> size_t GetRefCount() const { if (ptr_ == nullptr) return 0; return ptr_->GetRefCount(); } // Returns the pointer. T* get() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return ptr_.get(); } // Dereferences the pointer. T& operator*() const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT_NE(ptr_, nullptr) << "Failed precondition of IntrusiveSharedPtr::operator*: null pointer"; return *ptr_; } T* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { RIEGELI_ASSERT_NE(ptr_, nullptr) << "Failed precondition of IntrusiveSharedPtr::operator->: null " "pointer"; return ptr_.get(); } // Returns the pointer. This `IntrusiveSharedPtr` is left empty. T* Release() { return ptr_.release(); } template friend bool operator==(const IntrusiveSharedPtr& a, const IntrusiveSharedPtr& b) { return a.get() == b.get(); } friend bool operator==(const IntrusiveSharedPtr& a, std::nullptr_t) { return a.get() == nullptr; } // Indicates support for: // * `ExternalRef(const IntrusiveSharedPtr&, substr)` // * `ExternalRef(IntrusiveSharedPtr&&, substr)` friend void RiegeliSupportsExternalRef(const IntrusiveSharedPtr*) {} // Supports `ExternalRef`. friend ExternalStorage RiegeliToExternalStorage(IntrusiveSharedPtr* self) { return ExternalStorage(const_cast*>(self->Release()), [](void* ptr) { if (ptr != nullptr) static_cast(ptr)->Unref(); }); } // Supports `riegeli::Debug()`. template friend void RiegeliDebug(const IntrusiveSharedPtr& src, DebugStream& dest) { dest.Debug(src.get()); } // Supports `MemoryEstimator`. template friend void RiegeliRegisterSubobjects(const IntrusiveSharedPtr* self, MemoryEstimator& memory_estimator) { if (memory_estimator.RegisterNode(self->get())) { memory_estimator.RegisterDynamicObject(self->get()); } } private: // For converting from a `SharedPtr` with a compatible type. template friend class IntrusiveSharedPtr; using pointer = T*; // For `ABSL_NULLABILITY_COMPATIBLE`. struct Unrefer { void operator()(T* ptr) const { ptr->Unref(); } }; template static SubT* Ref(SubT* ptr) { if (ptr != nullptr) ptr->Ref(); return ptr; } template struct IsAssignable : std::conjunction< intrusive_shared_ptr_internal::HasHasUniqueOwner, std::disjunction< std::negation>, std::is_final>, std::is_move_assignable> {}; void ResetImpl(Initializer value) { if constexpr (IsAssignable::value) { if (IsUnique()) { *ptr_ = std::move(value); return; } } ptr_ = std::move(value); } std::unique_ptr ptr_; }; template explicit IntrusiveSharedPtr(T* ptr, PassOwnership = kPassOwnership) -> IntrusiveSharedPtr; template explicit IntrusiveSharedPtr(T* ptr, ShareOwnership) -> IntrusiveSharedPtr; template , int> = 0> explicit IntrusiveSharedPtr(T&& value) -> IntrusiveSharedPtr>; } // namespace riegeli #endif // RIEGELI_BASE_INTRUSIVE_SHARED_PTR_H_ ================================================ FILE: riegeli/base/invoker.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_INVOKER_H_ #define RIEGELI_BASE_INVOKER_H_ #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { template class InvokerType; namespace invoker_internal { template class InvokerBase : public ConditionallyAssignable< std::conjunction_v>...>> { protected: template using Result = std::invoke_result_t; template using ConstResult = std::invoke_result_t; public: // Constructs `InvokerType` from `function` convertible to `Function` and // `args...` convertible to `Args...`. template < typename SrcFunction, typename... SrcArgs, std::enable_if_t< std::conjunction_v, std::is_invocable, std::is_convertible, std::is_convertible...>, int> = 0> /*implicit*/ InvokerBase(SrcFunction&& function, SrcArgs&&... args) : function_(std::forward(function)), args_(std::forward(args)...) {} InvokerBase(InvokerBase&& that) = default; InvokerBase& operator=(InvokerBase&& that) = default; InvokerBase(const InvokerBase& that) = default; InvokerBase& operator=(const InvokerBase& that) = default; // Invokes the function. // // Usually conversion to the result of invocation is preferred because it can // avoid creating a temporary if the context accepts an arbitrary type // convertible to the result of invocation. An explicit `Invoke()` call can // force construction right away while avoiding specifying the full result // type. template Result Invoke() && { return std::apply(std::forward(function_), std::move(args_)); } template ConstResult Invoke() const& { return std::apply(function_, args_); } // Extracts the function. Function& function() & { return function_; } const Function& function() const& { return function_; } Function&& function() && { return std::move(function_); } const Function&& function() const&& { return std::move(function_); } // Extracts the given argument. template = 0> std::tuple_element_t>& arg() & { return std::get(args_); } template = 0> const std::tuple_element_t>& arg() const& { return std::get(args_); } template = 0> std::tuple_element_t>& arg() && { return std::get(std::move(args_)); } template = 0> const std::tuple_element_t>& arg() const&& { return std::get(std::move(args_)); } private: ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS Function function_; ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS std::tuple args_; }; template class InvokerConditionalConversion : public InvokerBase { private: using Result = typename InvokerConditionalConversion::InvokerBase::template Result<>; using ConstResult = typename InvokerConditionalConversion::InvokerBase:: template ConstResult<>; public: using InvokerConditionalConversion::InvokerBase::InvokerBase; InvokerConditionalConversion(InvokerConditionalConversion&& that) = default; InvokerConditionalConversion& operator=(InvokerConditionalConversion&& that) = default; InvokerConditionalConversion(const InvokerConditionalConversion& that) = default; InvokerConditionalConversion& operator=( const InvokerConditionalConversion& that) = default; // Invokes the function. /*implicit*/ operator Result() && { return std::move(*this).Invoke(); } // Invokes the function. /*implicit*/ operator ConstResult() const& { return this->Invoke(); } }; // Disable const functionality when the const function is not invocable with the // const arguments. template class InvokerConditionalConversion< std::enable_if_t, std::negation>>>, Function, Args...> : public InvokerBase { private: using Result = typename InvokerConditionalConversion::InvokerBase::template Result<>; public: using InvokerConditionalConversion::InvokerBase::InvokerBase; InvokerConditionalConversion(InvokerConditionalConversion&& that) = default; InvokerConditionalConversion& operator=(InvokerConditionalConversion&& that) = default; InvokerConditionalConversion(const InvokerConditionalConversion& that) = default; InvokerConditionalConversion& operator=( const InvokerConditionalConversion& that) = default; // Invokes the function. /*implicit*/ operator Result() && { return std::move(*this).Invoke(); } }; // Disable functionality when the function is not invocable with the arguments. template class InvokerConditionalConversion< std::enable_if_t>, Function, Args...> : public InvokerBase { public: using InvokerConditionalConversion::InvokerBase::InvokerBase; InvokerConditionalConversion(InvokerConditionalConversion&& that) = default; InvokerConditionalConversion& operator=(InvokerConditionalConversion&& that) = default; InvokerConditionalConversion(const InvokerConditionalConversion& that) = default; InvokerConditionalConversion& operator=( const InvokerConditionalConversion& that) = default; }; } // namespace invoker_internal // `InvokerType`, usually made with // `riegeli::Invoker(function, args...)`, packs a function together with its // arguments. `InvokerType` is convertible to // `Initializer` when the result of `Function` is convertible to `T`. // // This allows the function taking `Initializer` to construct the object // in-place, avoiding constructing a temporary and moving from it. // // `InvokerType` complements `MakerType` by extending constructors with factory // functions. // // The function and arguments are interpreted as by `std::invoke()`: the // function can also be a member pointer, in which case the first argument is // the target reference, reference wrapper, or pointer. template class InvokerType : public invoker_internal::InvokerConditionalConversion { private: template using Result = typename InvokerType::InvokerBase::template Result; template using ConstResult = typename InvokerType::InvokerBase::template ConstResult< DependentFunction>; public: using InvokerType::InvokerConditionalConversion::InvokerConditionalConversion; InvokerType(InvokerType&& that) = default; InvokerType& operator=(InvokerType&& that) = default; InvokerType(const InvokerType& that) = default; InvokerType& operator=(const InvokerType& that) = default; // Invokes the function and stores `std::decay_t` of the result of invocation // on the heap. // // In contrast to `std::make_unique()`, this supports custom deleters. // // For a non-default-constructed deleter, use `UniquePtr(deleter)`. template < typename Target, typename Deleter, typename DependentFunction = Function, std::enable_if_t< std::conjunction_v< IsConstructibleFromResult>, Result>, std::is_convertible>*, Target*>>, int> = 0> /*implicit*/ operator std::unique_ptr() && { return std::move(*this).template UniquePtr(); } template < typename Target, typename Deleter, typename DependentFunction = Function, std::enable_if_t< std::conjunction_v< IsConstructibleFromResult< std::decay_t>, ConstResult>, std::is_convertible>*, Target*>>, int> = 0> /*implicit*/ operator std::unique_ptr() const& { return UniquePtr(); } // Invokes the function and stores `std::decay_t` of the result of invocation // on the heap. // // In contrast to `std::make_unique()`, this supports custom deleters. // // Usually conversion to `std::unique_ptr` is preferred because it leads to // simpler source code. An explicit `UniquePtr()` call can force construction // right away while avoiding writing the full target type, and it allows to // use a non-default-constructed deleter. // // The `default_deleter` template parameter lets `UniquePtr()` with an // explicit template argument unambiguously call another overload of // `UniquePtr()`. template >, Result>::value, int> = 0> std::unique_ptr>> UniquePtr() && { return std::unique_ptr>>( new std::decay_t>(std::move(*this))); } template >, ConstResult>::value, int> = 0> std::unique_ptr>> UniquePtr() const& { return std::unique_ptr>>( new std::decay_t>(*this)); } template >, Result>::value, int> = 0> std::unique_ptr>, Deleter> UniquePtr() && { return std::unique_ptr>, Deleter>( new std::decay_t>(std::move(*this))); } template >, ConstResult>::value, int> = 0> std::unique_ptr>, Deleter> UniquePtr() const& { return std::unique_ptr>, Deleter>( new std::decay_t>(*this)); } template >, Result>::value, int> = 0> std::unique_ptr>, Deleter> UniquePtr( Deleter&& deleter) && { return std::unique_ptr>, Deleter>( new std::decay_t>(std::move(*this)), std::forward(deleter)); } template >, ConstResult>::value, int> = 0> std::unique_ptr>, Deleter> UniquePtr(Deleter&& deleter) const& { return std::unique_ptr>, Deleter>( new std::decay_t>(*this), std::forward(deleter)); } }; template explicit InvokerType(Function&&, Args&&...) -> InvokerType, std::decay_t...>; // `InvokerTargetRef::type` and `InvokerTargetRefT` deduce the appropriate // target type of a possibly const-qualified `InvokerType` // or its reference, such that `T` is convertible to `InvokerTargetRefT`, // and `T::Invoke()` returns `InvokerTargetRefT`. // // They are undefined when the invoker is not usable in the given const and // reference context. template struct InvokerTargetRef; template struct InvokerTargetRef> : std::invoke_result {}; template struct InvokerTargetRef> : std::invoke_result {}; template struct InvokerTargetRef : InvokerTargetRef {}; template struct InvokerTargetRef : InvokerTargetRef {}; template using InvokerTargetRefT = typename InvokerTargetRef::type; // `InvokerTarget::type` and `InvokerTargetT` deduce the appropriate // target type of a possibly const-qualified `InvokerType` // or its reference, decayed to its value type, such that `T` is convertible to // `InvokerTargetT`. // // This makes the result independent from whether the function returns a value // or a reference, if the result needs to be stored for later. // // They are undefined when the invoker is not usable in the given const and // reference context. namespace invoker_internal { template struct InvokerTargetImpl { // No `type` member when the invoker is not usable in the given const and // reference context. }; template struct InvokerTargetImpl>> : std::decay> {}; } // namespace invoker_internal template struct InvokerTarget : invoker_internal::InvokerTargetImpl {}; template using InvokerTargetT = typename InvokerTarget::type; // `riegeli::Invoker(function, args...)` returns // `InvokerType` which packs a function together with its // arguments. `InvokerType` is convertible to // `Initializer` when the result of `Function` is convertible to `T`. // // This allows the function taking `Initializer` to construct the object // in-place, avoiding constructing a temporary and moving from it. // // `riegeli::Invoker()` complements `riegeli::Maker()` by extending constructors // with factory functions. // // The function and arguments are interpreted as by `std::invoke()`: the // function can also be a member pointer, in which case the first argument is // the target reference, reference wrapper, or pointer. // // `riegeli::Invoker(function, args...)` does not own `function` or `args`, even // if they involve temporaries, hence it should be used only as a parameter of a // function or constructor, so that the temporaries outlive its usage. For // storing a `InvokerType` in a variable or returning it from a function, use // `riegeli::OwningInvoker(function, args...)` or construct `InvokerType` // directly. template , int> = 0> inline InvokerType Invoker( Function&& function ABSL_ATTRIBUTE_LIFETIME_BOUND, Args&&... args ABSL_ATTRIBUTE_LIFETIME_BOUND) { return {std::forward(function), std::forward(args)...}; } // `riegeli::OwningInvoker()` is like `riegeli::Invoker()`, but the arguments // are stored by value instead of by reference. This is useful for storing the // `InvokerType` in a variable or returning it from a function. // // If a particular argument is heavy and its lifetime is sufficient for storing // it by reference, wrap it in `std::ref()` or `std::cref()`. template , unwrap_ref_decay_t...>, int> = 0> inline InvokerType, unwrap_ref_decay_t...> OwningInvoker(Function&& function, Args&&... args) { return {std::forward(function), std::forward(args)...}; } } // namespace riegeli #endif // RIEGELI_BASE_INVOKER_H_ ================================================ FILE: riegeli/base/iterable.h ================================================ // Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_ITERABLE_H_ #define RIEGELI_BASE_ITERABLE_H_ #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { namespace iterable_internal { // Let unqualified `begin()` below refer either to a function named `begin()` // found via ADL or to `std::begin()`, as appropriate for the given iterable. // This is done in a separate namespace to avoid defining `riegeli::begin`. // Same for `end()` and `size()`. using std::begin; using std::end; using std::size; template struct IsIterable : std::false_type {}; template struct IsIterable()))>> : std::true_type {}; template struct IteratorType {}; template struct IteratorType::value>> : type_identity()))> {}; template struct ElementTypeInternal {}; template struct ElementTypeInternal::value>> : type_identity()))> {}; template struct ElementTypeInternal::value>> : type_identity())))> {}; template struct IterableHasSize : std::false_type {}; template struct IterableHasSize< Iterable, std::enable_if_t())), size_t>>> : std::true_type {}; } // namespace iterable_internal // `IsIterable::value` is `true` when `T` is iterable, supporting // `begin(iterable)` after `using std::begin;` (not all details are verified). using iterable_internal::IsIterable; // `IteratorTypeT::type` and `IteratorTypeT` is the type of // iterators over `Iterable`. using iterable_internal::IteratorType; template using IteratorTypeT = typename IteratorType::type; // `HasMovableElements::value` is `true` when moving (rather than // copying) out of elements of `Iterable` is safe. This is the case when // `Iterable` owns its elements, i.e. it is not a view container like // `absl::Span`, and it is not an lvalue reference. // // By default an iterable is detected as owning its elements when iterating over // `Iterable` and `const Iterable` yields elements of different types. This // also catches cases where `Iterable` always yields const elements or is const // itself. In these cases moving would be equivalent to copying, and trying to // move would just yield unnecessarily separate template instantiations. // // To customize that for a class `Iterable`, define a free function // `friend constexpr bool RiegeliHasMovableElements(Iterable*)` as a friend of // `Iterable` inside class definition or in the same namespace as `Iterable`, // so that it can be found via ADL. // // The argument of `RiegeliHasMovableElements(Iterable*)` is always a null // pointer, used to choose the right overload based on the type. template struct HasMovableElements : std::negation::type, typename iterable_internal::ElementTypeInternal< const Iterable>::type>> {}; template struct HasMovableElements< Iterable, std::enable_if_t>, std::is_convertible(nullptr))), bool>>>> : std::bool_constant(nullptr))> {}; template struct HasMovableElements : std::false_type {}; template struct HasMovableElements : HasMovableElements {}; // `MaybeMakeMoveIterator(iterator)` is // `std::make_move_iterator(iterator)` or `iterator`, depending on whether // moving out of elements of `Iterable` is safe. template inline auto MaybeMakeMoveIterator(Iterator iterator) { if constexpr (HasMovableElements::value) { return std::move_iterator(std::move(iterator)); } else { return iterator; } } // `ElementType::type` and `ElementTypeT` is the type of // elements yielded by iterating over `Iterable`. // // The result is a reference, except when iteration yields temporary objects. // If moving out of elements of `Iterable` is safe, this is an rvalue reference. template struct ElementType {}; template struct ElementType::value>> : iterable_internal::ElementTypeInternal< Iterable, HasMovableElements::value> {}; template using ElementTypeT = typename ElementType::type; // `IsIterableOf::value` is `true` when iterating over // `Iterable` yields elements convertible to `Element`. template struct IsIterableOf : std::false_type {}; template struct IsIterableOf::value>> : std::is_convertible, Element> {}; // `IsIterableOfPairs::value` is `true` when iterating // over `Iterable` yields pairs or pair proxies with keys convertible to `Key` // and values convertible to `Value`. template struct IsIterableOfPairs : std::false_type {}; template struct IsIterableOfPairs< Iterable, Key, Value, std::enable_if_t, std::is_convertible< decltype(std::declval>().first), Key>, std::is_convertible< decltype(std::declval>().second), Value>>>> : std::true_type {}; // `IsIterableOfPairsWithAssignableValues::value` // is `true` when iterating over `Iterable` yields pair proxies with keys // convertible to `Key` and values assignable from `Value`. template struct IsIterableOfPairsWithAssignableValues : std::false_type {}; template struct IsIterableOfPairsWithAssignableValues< Iterable, Key, Value, std::enable_if_t, std::is_convertible< decltype(std::declval>().first), Key>, std::is_assignable< decltype(std::declval>().second), Value>>>> : std::true_type {}; // TODO: Use `typename std::iterator_traits::iterator_concept` // instead when C++20 is unconditionally available. namespace iterable_internal { template struct IteratorConcept : type_identity< typename std::iterator_traits::iterator_category> {}; template struct IteratorConcept< Iterator, std::void_t::iterator_concept>> : type_identity::iterator_concept> { }; } // namespace iterable_internal // `IsForwardIterable::value` is `true` when the iterator over // `Iterable` is a forward iterator, in particular when it can be iterated // over multiple times. template struct IsForwardIterable : std::false_type {}; template struct IsForwardIterable< Iterable, std::enable_if_t, std::is_convertible>::type, std::forward_iterator_tag>>>> : std::true_type {}; // `IsRandomAccessIterable::value` is `true` when the iterator over // `Iterable` is a random access iterator. template struct IsRandomAccessIterable : std::false_type {}; template struct IsRandomAccessIterable< Iterable, std::enable_if_t, std::is_convertible>::type, std::random_access_iterator_tag>>>> : std::true_type {}; // `IterableHasSize::value` is `true` when `Iterable` supports // `size(iterable)` after `using std::size;`. using iterable_internal::IterableHasSize; // Represents the result of `operator->` if `operator*` returns a proxy // object rather than a true reference. In particular this can be used as // `iterator::pointer` if `iterator::reference` is not a true reference. template class ArrowProxy { public: explicit ArrowProxy(Reference ref) : ref_(std::move(ref)) {} ArrowProxy(const ArrowProxy& that) = default; ArrowProxy& operator=(const ArrowProxy& that) = default; ArrowProxy(ArrowProxy&& that) noexcept = default; ArrowProxy& operator=(ArrowProxy&& that) noexcept = default; const Reference* operator->() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return &ref_; } private: Reference ref_; }; // A pair-like type to be used as `iterator::reference` for iterators over a map // with a separate storage for keys and values. In C++20 this lets the iterator // satisfy `std::indirectly_readable`. // // It extends `std::pair` with conversions from `std::pair&` // and with `std::basic_common_reference` specializations. // // Since C++23, `std::pair` can be used directly instead. template class ReferencePair : public std::pair { public: using ReferencePair::pair::pair; template < class U1, class U2, std::enable_if_t< std::conjunction_v< std::is_constructible, std::is_constructible, std::negation, std::is_convertible>>>, int> = 0> explicit constexpr ReferencePair(std::pair& p) : ReferencePair::pair(p.first, p.second) {} template , std::is_convertible>, int> = 0> /*implicit*/ constexpr ReferencePair(std::pair& p) : ReferencePair::pair(p.first, p.second) {} }; } // namespace riegeli #if __cplusplus >= 202002L template class TQual, template class UQual> struct std::basic_common_reference, std::pair, TQual, UQual> { using type = riegeli::ReferencePair, UQual>, std::common_reference_t, UQual>>; }; template class TQual, template class UQual> struct std::basic_common_reference< std::pair, riegeli::ReferencePair, TQual, UQual> { using type = riegeli::ReferencePair, UQual>, std::common_reference_t, UQual>>; }; template class TQual, template class UQual> struct std::basic_common_reference, riegeli::ReferencePair, TQual, UQual> { using type = riegeli::ReferencePair, UQual>, std::common_reference_t, UQual>>; }; #endif #endif // RIEGELI_BASE_ITERABLE_H_ ================================================ FILE: riegeli/base/maker.h ================================================ // Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #ifndef RIEGELI_BASE_MAKER_H_ #define RIEGELI_BASE_MAKER_H_ #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/nullability.h" #include "riegeli/base/reset.h" #include "riegeli/base/temporary_storage.h" #include "riegeli/base/type_traits.h" ABSL_POINTERS_DEFAULT_NONNULL namespace riegeli { // `MakerType`, usually made with `riegeli::Maker(args...)`, packs // constructor arguments for a yet unspecified type, which will be specified by // the caller. `MakerType` is convertible to `Initializer` for any // `T` which can be constructed from `Args...`. // // This allows the function taking `Initializer` to construct the object // in-place, avoiding constructing a temporary and moving from it. // // In contrast to `MakerTypeFor`, `MakerType` requires the // caller to know `T`. // // `InvokerType` complements `MakerType` by extending constructors with factory // functions. template class MakerType : public ConditionallyAssignable< std::conjunction_v>...>> { public: // Constructs `MakerType` from `args...` convertible to `Args...`. template , std::is_convertible...>, int> = 0> /*implicit*/ MakerType(SrcArgs&&... args) : args_(std::forward(args)...) {} MakerType(MakerType&& that) = default; MakerType& operator=(MakerType&& that) = default; MakerType(const MakerType& that) = default; MakerType& operator=(const MakerType& that) = default; // Constructs the `T`. template , int> = 0> T Construct() && { return std::make_from_tuple(std::move(args_)); } template < typename T, std::enable_if_t, int> = 0> T Construct() const& { return std::make_from_tuple(args_); } // Constructs the `std::decay_t` on the heap. // // In contrast to `std::make_unique()`, this supports custom deleters. template >, std::enable_if_t< std::is_constructible_v, Args&&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr() && { return std::unique_ptr, Deleter>( new std::decay_t(std::move(*this).template Construct())); } template , Args&&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr(Deleter&& deleter) && { return std::unique_ptr, Deleter>( new std::decay_t(std::move(*this).template Construct()), std::forward(deleter)); } template < typename T, typename Deleter = std::default_delete>, std::enable_if_t, const Args&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr() const& { return std::unique_ptr, Deleter>( new std::decay_t(Construct())); } template < typename T, typename Deleter, std::enable_if_t, const Args&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr( Deleter&& deleter) const& { return std::unique_ptr, Deleter>( new std::decay_t(Construct()), std::forward(deleter)); } // Constructs the `T` in `storage` which must outlive the returned reference. // // `Reference()` instead of `Construct()` supports `Initializer::Reference()`. // // If the `storage` argument is omitted, the result is returned by value // instead of by reference, which is a more efficient way to construct the // temporary. template , int> = 0> T Reference() && { return std::move(*this).template Construct(); } template , int> = 0> T&& Reference( TemporaryStorage&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) && { return std::apply( [&](Args&&... args) -> T&& { return std::move(storage).emplace(std::forward(args)...); }, std::move(args_)); } template < typename T, std::enable_if_t, int> = 0> T Reference() const& { return Construct(); } template < typename T, std::enable_if_t, int> = 0> T&& Reference( TemporaryStorage&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) const& { return std::apply( [&](const Args&... args) -> T&& { return std::move(storage).emplace(args...); }, args_); } // `riegeli::Reset(dest, MakerType)` makes `dest` equivalent to the // constructed `T`. This avoids constructing a temporary `T` and moving from // it. template < typename T, std::enable_if_t>, SupportsReset>, int> = 0> friend void RiegeliReset(T& dest, MakerType&& src) { std::apply( [&](Args&&... args) { riegeli::Reset(dest, std::forward(args)...); }, std::move(src.args_)); } template < typename T, std::enable_if_t>, SupportsReset>, int> = 0> friend void RiegeliReset(T& dest, const MakerType& src) { std::apply([&](const Args&... args) { riegeli::Reset(dest, args...); }, src.args_); } // Extracts the given argument. template = 0> std::tuple_element_t>& arg() & { return std::get(args_); } template = 0> const std::tuple_element_t>& arg() const& { return std::get(args_); } template = 0> std::tuple_element_t>& arg() && { return std::get(std::move(args_)); } template = 0> const std::tuple_element_t>& arg() const&& { return std::get(std::move(args_)); } private: ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS std::tuple args_; }; // `MakerTypeFor, usually made with `riegeli::Maker(args...)`, // packs constructor arguments for `T`. `MakerTypeFor` is // convertible to `Initializer`. // // This allows the function taking `Initializer` to construct the object // in-place, avoiding constructing a temporary and moving from it. // // In contrast to `MakerType`, `MakerTypeFor` allows the // caller to deduce `T`, e.g. using `TargetT`. template class MakerTypeFor : public ConditionallyAssignable< std::conjunction_v>...>> { public: // Constructs `MakerTypeFor` from `args...` convertible to `Args...`. template , std::is_constructible, std::is_convertible...>, int> = 0> /*implicit*/ MakerTypeFor(SrcArgs&&... args) : maker_(std::forward(args)...) {} MakerTypeFor(MakerTypeFor&& that) = default; MakerTypeFor& operator=(MakerTypeFor&& that) = default; MakerTypeFor(const MakerTypeFor& that) = default; MakerTypeFor& operator=(const MakerTypeFor& that) = default; // Constructs the `T`. template < typename DependentT = T, std::enable_if_t, int> = 0> /*implicit*/ operator T() && { return std::move(*this).Construct(); } template , int> = 0> /*implicit*/ operator T() const& { return Construct(); } // Constructs the `T`. // // Usually conversion to `T` is preferred because it can avoid creating a // temporary if the context accepts an arbitrary type convertible to `T`. // An explicit `Construct()` call can force construction right away while // avoiding specifying the full target type. template < typename DependentT = T, std::enable_if_t, int> = 0> T Construct() && { return std::move(*this).maker().template Construct(); } template , int> = 0> T Construct() const& { return this->maker().template Construct(); } // Constructs the `std::decay_t` on the heap. // // In contrast to `std::make_unique()`, this supports deducing class template // arguments and custom deleters. // // For a non-default-constructed deleter, use `UniquePtr(deleter)`. template < typename Target, typename Deleter, std::enable_if_t< std::conjunction_v, Args&&...>, std::is_convertible*, Target*>>, int> = 0> /*implicit*/ operator std::unique_ptr() && { return std::move(*this).template UniquePtr(); } template , const Args&...>, std::is_convertible*, Target*>>, int> = 0> /*implicit*/ operator std::unique_ptr() const& { return UniquePtr(); } // Constructs the `std::decay_t` on the heap. // // In contrast to `std::make_unique()`, this supports deducing class template // arguments and custom deleters. // // Usually conversion to `std::unique_ptr` is preferred because it leads to // simpler source code. An explicit `UniquePtr()` call can force construction // right away while avoiding writing the full target type, and it allows to // use a non-default-constructed deleter. template >, typename DependentT = T, std::enable_if_t< std::is_constructible_v, Args&&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr() && { return std::move(*this).maker().template UniquePtr(); } template , Args&&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr(Deleter&& deleter) && { return std::move(*this).maker().template UniquePtr( std::forward(deleter)); } template >, typename DependentT = T, std::enable_if_t, const Args&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr() const& { return this->maker().template UniquePtr(); } template , const Args&...>, int> = 0> std::unique_ptr, Deleter> UniquePtr( Deleter&& deleter) const& { return this->maker().template UniquePtr( std::forward(deleter)); } // Constructs the `T` in `storage` which must outlive the returned reference. // // `Reference()` instead of conversion to `T` or `Construct()` supports // `Initializer::Reference()`. // // If the `storage` argument is omitted, the result is returned by value // instead of by reference, which is a more efficient way to construct the // temporary. template < typename DependentT = T, std::enable_if_t, int> = 0> T Reference() && { return std::move(*this).maker().template Reference(); } template < typename DependentT = T, std::enable_if_t, int> = 0> T&& Reference( TemporaryStorage&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) && { return std::move(*this).maker().template Reference(std::move(storage)); } template , int> = 0> T Reference() const& { return this->maker().template Reference(); } template , int> = 0> T&& Reference( TemporaryStorage&& storage ABSL_ATTRIBUTE_LIFETIME_BOUND) const& { return this->maker().template Reference(std::move(storage)); } // `riegeli::Reset(dest, MakerTypeFor)` makes `dest` equivalent to the // constructed `T`. This avoids constructing a temporary `T` and moving from // it. template >, SupportsReset>, int> = 0> friend void RiegeliReset(T& dest, MakerTypeFor&& src) { riegeli::Reset(dest, std::move(src).maker()); } template >, SupportsReset>, int> = 0> friend void RiegeliReset(T& dest, const MakerTypeFor& src) { riegeli::Reset(dest, src.maker()); } // Extracts the given argument. template = 0> std::tuple_element_t>& arg() & { return maker().template arg(); } template = 0> const std::tuple_element_t>& arg() const& { return maker().template arg(); } template = 0> std::tuple_element_t>& arg() && { return std::move(*this).maker().template arg(); } template = 0> const std::tuple_element_t>& arg() const&& { return std::move(*this).maker().template arg(); } // Extracts the corresponding `MakerType` which does not specify `T`. // // This is useful for handling `MakerType` and `MakerTypeFor` generically. MakerType& maker() & { return maker_; } const MakerType& maker() const& { return maker_; } MakerType&& maker() && { return std::move(maker_); } const MakerType&& maker() const&& { return std::move(maker_); } private: ABSL_ATTRIBUTE_NO_UNIQUE_ADDRESS MakerType maker_; }; // `MakerTarget::type` and `MakerTargetT` deduce the appropriate target // type of a possibly const-qualified `MakerTypeFor` or its // reference, such that `T` is convertible to `MakerTargetT`, and // `T::Construct()` returns `MakerTargetT`. // // They are undefined when the maker is not usable in the given const and // reference context. namespace maker_internal { template struct MakerTargetImpl { // No `type` member when the maker is not usable in the given const and // reference context. }; template struct MakerTargetImpl< MakerTypeFor, std::enable_if_t>> { using type = Target; }; template struct MakerTargetImpl< const MakerTypeFor, std::enable_if_t>> { using type = Target; }; } // namespace maker_internal template struct MakerTarget : maker_internal::MakerTargetImpl {}; template struct MakerTarget : maker_internal::MakerTargetImpl {}; template struct MakerTarget : maker_internal::MakerTargetImpl {}; template using MakerTargetT = typename MakerTarget::type; // `riegeli::Maker(args...)` returns `MakerType` which packs // constructor arguments for a yet unspecified type, which will be specified by // the caller. `riegeli::Maker(args...)` is convertible to `Initializer` for // any `T` which can be constructed from `Args...`. // // This allows the function taking `Initializer` to construct the object // in-place, avoiding constructing a temporary and moving from it. // // In contrast to `riegeli::Maker(args...)`, `riegeli::Maker(args...)` // requires the caller to know `T`. Prefer // `Template(riegeli::Maker(args...))` over // `Template(riegeli::Maker(args...))` if CTAD of `Template` can be used. // // `riegeli::Invoker()` complements `riegeli::Maker()` by extending constructors // with factory functions. // // `riegeli::Maker(args...)` does not own `args`, even if they involve // temporaries, hence it should be used only as a parameter of a function or // constructor, so that the temporaries outlive its usage. For storing a // `MakerType` in a variable or returning it from a function, use // `riegeli::OwningMaker(args...)` or construct `MakerType` directly. // // The `generic` template parameter lets `riegeli::Maker()` with an explicit // template argument unambiguously call another overload of `riegeli::Maker()`. template MakerType Maker(Args&&... args ABSL_ATTRIBUTE_LIFETIME_BOUND) { return {std::forward(args)...}; } // `riegeli::Maker(args...)` returns `MakerTypeFor` which packs // constructor arguments for `T`. `riegeli::Maker(args...)` is convertible to // `Initializer`. // // This allows the function taking `Initializer` to construct the object // in-place, avoiding constructing a temporary and moving from it. // // `riegeli::Invoker()` complements `riegeli::Maker()` by extending // constructors with factory functions. // // In contrast to `riegeli::Maker(args...)`, `riegeli::Maker(args...)` allows // the caller to deduce `T`, e.g. using `TargetT`. // // `riegeli::Maker(args...)` does not own `args`, even if they involve // temporaries, hence it should be used only as a parameter of a function or // constructor, so that the temporaries outlive its usage. For storing a // `MakerTypeFor` in a variable or returning it from a function, use // `riegeli::OwningMaker(args...)` or construct `MakerTypeFor` directly. template , int> = 0> MakerTypeFor Maker(Args&&... args ABSL_ATTRIBUTE_LIFETIME_BOUND) { return {std::forward(args)...}; } // `riegeli::Maker